Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2891914
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Advanced/Developer...
View Handle
View Hovercard
Size
25 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
index 2e456ec408..45011ba982 100644
--- a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
+++ b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
@@ -1,588 +1,587 @@
<?php
class PhabricatorElasticFulltextStorageEngine
extends PhabricatorFulltextStorageEngine {
private $index;
private $timeout;
private $version;
public function setService(PhabricatorSearchService $service) {
$this->service = $service;
$config = $service->getConfig();
$index = idx($config, 'path', '/phabricator');
$this->index = str_replace('/', '', $index);
$this->timeout = idx($config, 'timeout', 15);
$this->version = (int)idx($config, 'version', 5);
return $this;
}
public function getEngineIdentifier() {
return 'elasticsearch';
}
public function getTimestampField() {
return $this->version < 2 ?
'_timestamp' : 'lastModified';
}
public function getTextFieldType() {
return $this->version >= 5
? 'text' : 'string';
}
public function getHostType() {
return new PhabricatorElasticSearchHost($this);
}
/**
* @return PhabricatorElasticSearchHost
*/
public function getHostForRead() {
return $this->getService()->getAnyHostForRole('read');
}
/**
* @return PhabricatorElasticSearchHost
*/
public function getHostForWrite() {
return $this->getService()->getAnyHostForRole('write');
}
public function setTimeout($timeout) {
$this->timeout = $timeout;
return $this;
}
public function getTimeout() {
return $this->timeout;
}
public function getTypeConstants($class) {
$relationship_class = new ReflectionClass($class);
$typeconstants = $relationship_class->getConstants();
return array_unique(array_values($typeconstants));
}
public function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $doc) {
$host = $this->getHostForWrite();
$type = $doc->getDocumentType();
$phid = $doc->getPHID();
$handle = id(new PhabricatorHandleQuery())
->setViewer(PhabricatorUser::getOmnipotentUser())
->withPHIDs(array($phid))
->executeOne();
$timestamp_key = $this->getTimestampField();
// URL is not used internally but it can be useful externally.
$spec = array(
'title' => $doc->getDocumentTitle(),
'url' => PhabricatorEnv::getProductionURI($handle->getURI()),
'dateCreated' => $doc->getDocumentCreated(),
$timestamp_key => $doc->getDocumentModified(),
);
foreach ($doc->getFieldData() as $field) {
list($field_name, $corpus, $aux) = $field;
if (!isset($spec[$field_name])) {
$spec[$field_name] = array($corpus);
} else {
$spec[$field_name][] = $corpus;
}
if ($aux != null) {
$spec[$field_name][] = $aux;
}
}
foreach ($doc->getRelationshipData() as $field) {
list($field_name, $related_phid, $rtype, $time) = $field;
if (!isset($spec[$field_name])) {
$spec[$field_name] = array($related_phid);
} else {
$spec[$field_name][] = $related_phid;
}
if ($time) {
$spec[$field_name.'_ts'] = $time;
}
}
$this->executeRequest($host, "/{$type}/{$phid}/", $spec, 'PUT');
}
public function reconstructDocument($phid) {
$type = phid_get_type($phid);
$host = $this->getHostForRead();
$response = $this->executeRequest($host, "/{$type}/{$phid}", array());
if (empty($response['exists'])) {
return null;
}
$hit = $response['_source'];
$doc = new PhabricatorSearchAbstractDocument();
$doc->setPHID($phid);
$doc->setDocumentType($response['_type']);
$doc->setDocumentTitle($hit['title']);
$doc->setDocumentCreated($hit['dateCreated']);
$doc->setDocumentModified($hit[$this->getTimestampField()]);
foreach ($hit['field'] as $fdef) {
$field_type = $fdef['type'];
$doc->addField($field_type, $hit[$field_type], $fdef['aux']);
}
foreach ($hit['relationship'] as $rtype => $rships) {
foreach ($rships as $rship) {
$doc->addRelationship(
$rtype,
$rship['phid'],
$rship['phidType'],
$rship['when']);
}
}
return $doc;
}
private function buildSpec(PhabricatorSavedQuery $query) {
$q = new PhabricatorElasticSearchQueryBuilder('bool');
$query_string = $query->getParameter('query');
if (strlen($query_string)) {
$fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
// Build a simple_query_string query over all fields that must match all
// of the words in the search string.
$q->addMustClause(array(
'simple_query_string' => array(
'query' => $query_string,
'fields' => array(
PhabricatorSearchDocumentFieldType::FIELD_TITLE.'.*',
PhabricatorSearchDocumentFieldType::FIELD_BODY.'.*',
PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'.*',
),
'default_operator' => 'AND',
),
));
// This second query clause is "SHOULD' so it only affects ranking of
// documents which already matched the Must clause. This amplifies the
// score of documents which have an exact match on title, body
// or comments.
$q->addShouldClause(array(
'simple_query_string' => array(
'query' => $query_string,
'fields' => array(
'*.raw',
PhabricatorSearchDocumentFieldType::FIELD_TITLE.'^4',
PhabricatorSearchDocumentFieldType::FIELD_BODY.'^3',
PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'^1.2',
),
'analyzer' => 'english_exact',
'default_operator' => 'and',
),
));
}
$exclude = $query->getParameter('exclude');
if ($exclude) {
$q->addFilterClause(array(
'not' => array(
'ids' => array(
'values' => array($exclude),
),
),
));
}
$relationship_map = array(
PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR =>
$query->getParameter('authorPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER =>
$query->getParameter('subscriberPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_PROJECT =>
$query->getParameter('projectPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY =>
$query->getParameter('repositoryPHIDs', array()),
);
$statuses = $query->getParameter('statuses', array());
$statuses = array_fuse($statuses);
$rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN;
$rel_closed = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED;
$rel_unowned = PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED;
$include_open = !empty($statuses[$rel_open]);
$include_closed = !empty($statuses[$rel_closed]);
if ($include_open && !$include_closed) {
$q->addExistsClause($rel_open);
} else if (!$include_open && $include_closed) {
$q->addExistsClause($rel_closed);
}
if ($query->getParameter('withUnowned')) {
$q->addExistsClause($rel_unowned);
}
$rel_owner = PhabricatorSearchRelationship::RELATIONSHIP_OWNER;
if ($query->getParameter('withAnyOwner')) {
$q->addExistsClause($rel_owner);
} else {
$owner_phids = $query->getParameter('ownerPHIDs', array());
if (count($owner_phids)) {
$q->addTermsClause($rel_owner, $owner_phids);
}
}
foreach ($relationship_map as $field => $phids) {
if (is_array($phids) && !empty($phids)) {
$q->addTermsClause($field, $phids);
}
}
if (!$q->getClauseCount('must')) {
$q->addMustClause(array('match_all' => array('boost' => 1 )));
}
$spec = array(
'_source' => false,
'query' => array(
'bool' => $q->toArray(),
),
);
if (!$query->getParameter('query')) {
$spec['sort'] = array(
array('dateCreated' => 'desc'),
);
}
$offset = (int)$query->getParameter('offset', 0);
$limit = (int)$query->getParameter('limit', 101);
if ($offset + $limit > 10000) {
throw new Exception(pht(
'Query offset is too large. offset+limit=%s (max=%s)',
$offset + $limit,
10000));
}
$spec['from'] = $offset;
$spec['size'] = $limit;
return $spec;
}
public function executeSearch(PhabricatorSavedQuery $query) {
$types = $query->getParameter('types');
if (!$types) {
$types = array_keys(
PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes());
}
// Don't use '/_search' for the case that there is something
// else in the index (for example if 'phabricator' is only an alias to
// some bigger index). Use '/$types/_search' instead.
$uri = '/'.implode(',', $types).'/_search';
$spec = $this->buildSpec($query);
$exceptions = array();
foreach ($this->service->getAllHostsForRole('read') as $host) {
try {
$response = $this->executeRequest($host, $uri, $spec);
$phids = ipull($response['hits']['hits'], '_id');
return $phids;
} catch (Exception $e) {
$exceptions[] = $e;
}
}
throw new PhutilAggregateException(pht('All Fulltext Search hosts failed:'),
$exceptions);
}
public function indexExists(PhabricatorElasticSearchHost $host = null) {
if (!$host) {
$host = $this->getHostForRead();
}
try {
if ($this->version >= 5) {
$uri = '/_stats/';
$res = $this->executeRequest($host, $uri, array());
return isset($res['indices']['phabricator']);
} else if ($this->version >= 2) {
$uri = '';
} else {
$uri = '/_status/';
}
return (bool)$this->executeRequest($host, $uri, array());
} catch (HTTPFutureHTTPResponseStatus $e) {
if ($e->getStatusCode() == 404) {
return false;
}
throw $e;
}
}
private function getIndexConfiguration() {
$data = array();
$data['settings'] = array(
'index' => array(
'auto_expand_replicas' => '0-2',
'analysis' => array(
'filter' => array(
'english_stop' => array(
'type' => 'stop',
'stopwords' => '_english_',
),
'english_stemmer' => array(
'type' => 'stemmer',
'language' => 'english',
),
'english_possessive_stemmer' => array(
'type' => 'stemmer',
'language' => 'possessive_english',
),
),
'analyzer' => array(
'english_exact' => array(
'tokenizer' => 'standard',
'filter' => array('lowercase'),
),
'letter_stop' => array(
'tokenizer' => 'letter',
'filter' => array('lowercase', 'english_stop'),
),
'english_stem' => array(
'tokenizer' => 'standard',
'filter' => array(
'english_possessive_stemmer',
'lowercase',
'english_stop',
'english_stemmer',
),
),
),
),
),
);
$fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
$relationships = $this->getTypeConstants('PhabricatorSearchRelationship');
$doc_types = array_keys(
PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes());
$text_type = $this->getTextFieldType();
foreach ($doc_types as $type) {
$properties = array();
foreach ($fields as $field) {
// Use the custom analyzer for the corpus of text
$properties[$field] = array(
'type' => $text_type,
'fields' => array(
'raw' => array(
'type' => $text_type,
'analyzer' => 'english_exact',
'search_analyzer' => 'english',
'search_quote_analyzer' => 'english_exact',
),
'keywords' => array(
'type' => $text_type,
'analyzer' => 'letter_stop',
),
'stems' => array(
'type' => $text_type,
'analyzer' => 'english_stem',
),
),
);
}
if ($this->version < 5) {
foreach ($relationships as $rel) {
$properties[$rel] = array(
'type' => 'string',
'index' => 'not_analyzed',
'include_in_all' => false,
);
$properties[$rel.'_ts'] = array(
'type' => 'date',
'include_in_all' => false,
);
}
} else {
foreach ($relationships as $rel) {
$properties[$rel] = array(
'type' => 'keyword',
'include_in_all' => false,
'doc_values' => false,
);
$properties[$rel.'_ts'] = array(
'type' => 'date',
'include_in_all' => false,
);
}
}
// Ensure we have dateCreated since the default query requires it
$properties['dateCreated']['type'] = 'date';
$properties['lastModified']['type'] = 'date';
$data['mappings'][$type]['properties'] = $properties;
}
return $data;
}
public function indexIsSane(PhabricatorElasticSearchHost $host = null) {
if (!$host) {
$host = $this->getHostForRead();
}
if (!$this->indexExists($host)) {
return false;
}
$cur_mapping = $this->executeRequest($host, '/_mapping/', array());
$cur_settings = $this->executeRequest($host, '/_settings/', array());
$actual = array_merge($cur_settings[$this->index],
$cur_mapping[$this->index]);
$res = $this->check($actual, $this->getIndexConfiguration());
return $res;
}
/**
* Recursively check if two Elasticsearch configuration arrays are equal
*
* @param $actual
* @param $required array
* @return bool
*/
private function check($actual, $required, $path = '') {
foreach ($required as $key => $value) {
if (!array_key_exists($key, $actual)) {
if ($key === '_all') {
// The _all field never comes back so we just have to assume it
// is set correctly.
continue;
}
return false;
}
if (is_array($value)) {
if (!is_array($actual[$key])) {
return false;
}
if (!$this->check($actual[$key], $value, $path.'.'.$key)) {
return false;
}
continue;
}
$actual[$key] = self::normalizeConfigValue($actual[$key]);
$value = self::normalizeConfigValue($value);
if ($actual[$key] != $value) {
return false;
}
}
return true;
}
/**
* Normalize a config value for comparison. Elasticsearch accepts all kinds
* of config values but it tends to throw back 'true' for true and 'false' for
* false so we normalize everything. Sometimes, oddly, it'll throw back false
* for false....
*
* @param mixed $value config value
* @return mixed value normalized
*/
private static function normalizeConfigValue($value) {
if ($value === true) {
return 'true';
} else if ($value === false) {
return 'false';
}
return $value;
}
public function initIndex() {
$host = $this->getHostForWrite();
if ($this->indexExists()) {
$this->executeRequest($host, '/', array(), 'DELETE');
}
$data = $this->getIndexConfiguration();
$this->executeRequest($host, '/', $data, 'PUT');
}
public function getIndexStats(PhabricatorElasticSearchHost $host = null) {
if ($this->version < 2) {
return false;
}
if (!$host) {
$host = $this->getHostForRead();
}
$uri = '/_stats/';
- $host = $this->getHostForRead();
$res = $this->executeRequest($host, $uri, array());
$stats = $res['indices'][$this->index];
return array(
pht('Queries') =>
idxv($stats, array('primaries', 'search', 'query_total')),
pht('Documents') =>
idxv($stats, array('total', 'docs', 'count')),
pht('Deleted') =>
idxv($stats, array('total', 'docs', 'deleted')),
pht('Storage Used') =>
phutil_format_bytes(idxv($stats,
array('total', 'store', 'size_in_bytes'))),
);
}
private function executeRequest(PhabricatorElasticSearchHost $host, $path,
array $data, $method = 'GET') {
$uri = $host->getURI($path);
$data = phutil_json_encode($data);
$future = new HTTPSFuture($uri, $data);
$future->addHeader('Content-Type', 'application/json');
if ($method != 'GET') {
$future->setMethod($method);
}
if ($this->getTimeout()) {
$future->setTimeout($this->getTimeout());
}
try {
list($body) = $future->resolvex();
} catch (HTTPFutureResponseStatus $ex) {
if ($ex->isTimeout() || (int)$ex->getStatusCode() > 499) {
$host->didHealthCheck(false);
}
throw $ex;
}
if ($method != 'GET') {
return null;
}
try {
$data = phutil_json_decode($body);
$host->didHealthCheck(true);
return $data;
} catch (PhutilJSONParserException $ex) {
$host->didHealthCheck(false);
throw new PhutilProxyException(
pht('ElasticSearch server returned invalid JSON!'),
$ex);
}
}
}
diff --git a/src/infrastructure/cluster/search/PhabricatorSearchService.php b/src/infrastructure/cluster/search/PhabricatorSearchService.php
index 59ef7c408d..10cf78d94b 100644
--- a/src/infrastructure/cluster/search/PhabricatorSearchService.php
+++ b/src/infrastructure/cluster/search/PhabricatorSearchService.php
@@ -1,254 +1,254 @@
<?php
class PhabricatorSearchService
extends Phobject {
const KEY_REFS = 'cluster.search.refs';
protected $config;
protected $disabled;
protected $engine;
protected $hosts = array();
protected $hostsConfig;
protected $hostType;
protected $roles = array();
const STATUS_OKAY = 'okay';
const STATUS_FAIL = 'fail';
const ROLE_WRITE = 'write';
const ROLE_READ = 'read';
public function __construct(PhabricatorFulltextStorageEngine $engine) {
$this->engine = $engine;
$this->hostType = $engine->getHostType();
}
/**
* @throws Exception
*/
public function newHost($config) {
$host = clone($this->hostType);
$host_config = $this->config + $config;
$host->setConfig($host_config);
$this->hosts[] = $host;
return $host;
}
public function getEngine() {
return $this->engine;
}
public function getDisplayName() {
return $this->hostType->getDisplayName();
}
public function getStatusViewColumns() {
return $this->hostType->getStatusViewColumns();
}
public function setConfig($config) {
$this->config = $config;
if (!isset($config['hosts'])) {
$config['hosts'] = array(
array(
'host' => idx($config, 'host'),
'port' => idx($config, 'port'),
'protocol' => idx($config, 'protocol'),
'roles' => idx($config, 'roles'),
),
);
}
foreach ($config['hosts'] as $host) {
$this->newHost($host);
}
}
public function getConfig() {
return $this->config;
}
public static function getConnectionStatusMap() {
return array(
self::STATUS_OKAY => array(
'icon' => 'fa-exchange',
'color' => 'green',
'label' => pht('Okay'),
),
self::STATUS_FAIL => array(
'icon' => 'fa-times',
'color' => 'red',
'label' => pht('Failed'),
),
);
}
public function isWritable() {
return (bool)$this->getAllHostsForRole(self::ROLE_WRITE);
}
public function isReadable() {
return (bool)$this->getAllHostsForRole(self::ROLE_READ);
}
public function getPort() {
return idx($this->config, 'port');
}
public function getProtocol() {
return idx($this->config, 'protocol');
}
public function getVersion() {
return idx($this->config, 'version');
}
public function getHosts() {
return $this->hosts;
}
/**
* Get a random host reference with the specified role, skipping hosts which
* failed recent health checks.
* @throws PhabricatorClusterNoHostForRoleException if no healthy hosts match.
* @return PhabricatorSearchHost
*/
public function getAnyHostForRole($role) {
$hosts = $this->getAllHostsForRole($role);
shuffle($hosts);
foreach ($hosts as $host) {
$health = $host->getHealthRecord();
if ($health->getIsHealthy()) {
return $host;
}
}
throw new PhabricatorClusterNoHostForRoleException($role);
}
/**
* Get all configured hosts for this service which have the specified role.
* @return PhabricatorSearchHost[]
*/
public function getAllHostsForRole($role) {
// if the role is explicitly set to false at the top level, then all hosts
// have the role disabled.
if (idx($this->config, $role) === false) {
return array();
}
$hosts = array();
foreach ($this->hosts as $host) {
if ($host->hasRole($role)) {
$hosts[] = $host;
}
}
return $hosts;
}
/**
* Get a reference to all configured fulltext search cluster services
* @return PhabricatorSearchService[]
*/
public static function getAllServices() {
$cache = PhabricatorCaches::getRequestCache();
$refs = $cache->getKey(self::KEY_REFS);
if (!$refs) {
$refs = self::newRefs();
$cache->setKey(self::KEY_REFS, $refs);
}
return $refs;
}
/**
* Load all valid PhabricatorFulltextStorageEngine subclasses
*/
public static function loadAllFulltextStorageEngines() {
return id(new PhutilClassMapQuery())
->setAncestorClass('PhabricatorFulltextStorageEngine')
->setUniqueMethod('getEngineIdentifier')
->execute();
}
/**
* Create instances of PhabricatorSearchService based on configuration
* @return PhabricatorSearchService[]
*/
public static function newRefs() {
$services = PhabricatorEnv::getEnvConfig('cluster.search');
$engines = self::loadAllFulltextStorageEngines();
$refs = array();
foreach ($services as $config) {
// Normally, we've validated configuration before we get this far, but
// make sure we don't fatal if we end up here with a bogus configuration.
if (!isset($engines[$config['type']])) {
throw new Exception(
pht(
'Configured search engine type "%s" is unknown. Valid engines '.
'are: %s.',
$config['type'],
implode(', ', array_keys($engines))));
}
- $engine = $engines[$config['type']];
+ $engine = clone($engines[$config['type']]);
$cluster = new self($engine);
$cluster->setConfig($config);
$engine->setService($cluster);
$refs[] = $cluster;
}
return $refs;
}
/**
* (re)index the document: attempt to pass the document to all writable
* fulltext search hosts
* @throws PhabricatorClusterNoHostForRoleException
*/
public static function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $doc) {
$indexed = 0;
foreach (self::getAllServices() as $service) {
$hosts = $service->getAllHostsForRole('write');
if (count($hosts)) {
$service->getEngine()->reindexAbstractDocument($doc);
$indexed++;
}
}
if ($indexed == 0) {
throw new PhabricatorClusterNoHostForRoleException('write');
}
}
/**
* Execute a full-text query and return a list of PHIDs of matching objects.
* @return string[]
* @throws PhutilAggregateException
*/
public static function executeSearch(PhabricatorSavedQuery $query) {
$exceptions = array();
// try all services until one succeeds
foreach (self::getAllServices() as $service) {
try {
$engine = $service->getEngine();
$res = $engine->executeSearch($query);
// return immediately if we get results
return $res;
} catch (Exception $ex) {
$exceptions[] = $ex;
}
}
$msg = pht('All of the configured Fulltext Search services failed.');
throw new PhutilAggregateException($msg, $exceptions);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jan 19 2025, 15:56 (7 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1126234
Default Alt Text
(25 KB)
Attached To
Mode
rP Phorge
Attached
Detach File
Event Timeline
Log In to Comment