/** * Run the ApplyOperation job * @throws \Exception */ public function perform() { try { $this->debugLog("[JOBID " . $this->job->payload['id'] . "] ApplyOperation::perform() start"); $timer = new \Tripod\Timer(); $timer->start(); $this->validateArgs(); $statsConfig = array(); if (isset($this->args['statsConfig'])) { $statsConfig['statsConfig'] = $this->args['statsConfig']; } // set the config to what is received \Tripod\Mongo\Config::setConfig($this->args[self::TRIPOD_CONFIG_KEY]); $this->getStat()->increment(MONGO_QUEUE_APPLY_OPERATION_JOB . '.' . SUBJECT_COUNT, count($this->args[self::SUBJECTS_KEY])); foreach ($this->args[self::SUBJECTS_KEY] as $subject) { $opTimer = new \Tripod\Timer(); $opTimer->start(); $impactedSubject = $this->createImpactedSubject($subject); $impactedSubject->update(); $opTimer->stop(); // stat time taken to perform operation for the given subject $this->getStat()->timer(MONGO_QUEUE_APPLY_OPERATION . '.' . $subject['operation'], $opTimer->result()); } $timer->stop(); // stat time taken to process job, from time it was picked up $this->getStat()->timer(MONGO_QUEUE_APPLY_OPERATION_SUCCESS, $timer->result()); $this->debugLog("[JOBID " . $this->job->payload['id'] . "] ApplyOperation::perform() done in {$timer->result()}ms"); } catch (\Exception $e) { $this->getStat()->increment(MONGO_QUEUE_APPLY_OPERATION_FAIL); $this->errorLog("Caught exception in " . get_class($this) . ": " . $e->getMessage()); throw $e; } }
$tableSpec = \Tripod\Mongo\Config::getInstance()->getTableSpecification($storeName, $tableId); if (array_key_exists("from", $tableSpec)) { \Tripod\Mongo\Config::getInstance()->setMongoCursorTimeout(-1); print "Generating {$tableId}"; $tripod = new \Tripod\Mongo\Driver($tableSpec['from'], $storeName, array('stat' => $stat)); $tTables = $tripod->getTripodTables(); if ($id) { print " for {$id}....\n"; $tTables->generateTableRows($tableId, $id); } else { print " for all tables....\n"; $tTables->generateTableRows($tableId, null, null, $queue); } } } $t = new \Tripod\Timer(); $t->start(); \Tripod\Mongo\Config::setConfig(json_decode(file_get_contents($configLocation), true)); if (isset($options['s']) || isset($options['storename'])) { $storeName = isset($options['s']) ? $options['s'] : $options['storename']; } else { $storeName = null; } if (isset($options['t']) || isset($options['spec'])) { $tableId = isset($options['t']) ? $options['t'] : $options['spec']; } else { $tableId = null; } if (isset($options['i']) || isset($options['id'])) { $id = isset($options['i']) ? $options['i'] : $options['id']; } else {
/** * @param string $tableType * @param string|null $resource * @param string|null $context * @param string|null $queueName Queue for background bulk generation * @return null //@todo: this should be a bool */ public function generateTableRows($tableType, $resource = null, $context = null, $queueName = null) { $t = new \Tripod\Timer(); $t->start(); $this->temporaryFields = array(); $tableSpec = Config::getInstance()->getTableSpecification($this->storeName, $tableType); $collection = $this->config->getCollectionForTable($this->storeName, $tableType); if ($tableSpec == null) { $this->debugLog("Could not find a table specification for {$tableType}"); return null; } // ensure that the ID field, view type, and the impactIndex indexes are correctly set up $collection->createIndex(array('_id.r' => 1, '_id.c' => 1, '_id.type' => 1), array('background' => 1)); $collection->createIndex(array('_id.type' => 1), array('background' => 1)); $collection->createIndex(array('value.' . _IMPACT_INDEX => 1), array('background' => 1)); // ensure any custom view indexes foreach (Config::getInstance()->getTableSpecifications($this->storeName) as $tSpec) { if (isset($tSpec['ensureIndexes']) && $tSpec['to_data_source'] == $tableSpec['to_data_source']) { foreach ($tSpec['ensureIndexes'] as $ensureIndex) { $this->ensureIndex($collection, $ensureIndex); } } } // default the context $contextAlias = $this->getContextAlias($context); // default collection $from = isset($tableSpec["from"]) ? $tableSpec["from"] : $this->podName; $types = array(); if (is_array($tableSpec["type"])) { foreach ($tableSpec["type"] as $type) { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($type)); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($type)); } } else { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($tableSpec["type"])); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($tableSpec["type"])); } $filter = array('$or' => $types); if (isset($resource)) { $filter["_id"] = array(_ID_RESOURCE => $this->labeller->uri_to_alias($resource), _ID_CONTEXT => $contextAlias); } $docs = $this->config->getCollectionForCBD($this->storeName, $from)->find($filter, array('maxTimeMS' => 1000000)); foreach ($docs as $doc) { if ($queueName && !$resource) { $subject = new ImpactedSubject($doc['_id'], OP_TABLES, $this->storeName, $from, array($tableType)); $jobOptions = array(); if ($this->stat || !empty($this->statsConfig)) { $jobOptions['statsConfig'] = $this->getStatsConfig(); } $this->getApplyOperation()->createJob(array($subject), $queueName, $jobOptions); } else { // set up ID $generatedRow = array("_id" => array(_ID_RESOURCE => $doc["_id"][_ID_RESOURCE], _ID_CONTEXT => $doc["_id"][_ID_CONTEXT], _ID_TYPE => $tableSpec['_id'])); $value = array('_id' => $doc['_id']); // everything must go in the value object todo: this is a hang over from map reduce days, engineer out once we have stability on new PHP method for M/R $this->addIdToImpactIndex($doc['_id'], $value); // need to add the doc to the impact index to be consistent with views/search etc. this is needed for discovering impacted operations $this->addFields($doc, $tableSpec, $value); if (isset($tableSpec['joins'])) { $this->doJoins($doc, $tableSpec['joins'], $value, $from, $contextAlias); } if (isset($tableSpec['counts'])) { $this->doCounts($doc, $tableSpec['counts'], $value); } if (isset($tableSpec['computed_fields'])) { $this->doComputedFields($tableSpec, $value); } // Remove temp fields from document $generatedRow['value'] = array_diff_key($value, array_flip($this->temporaryFields)); $this->truncatingSave($collection, $generatedRow); } } $t->stop(); $this->timingLog(MONGO_CREATE_TABLE, array('type' => $tableSpec['type'], 'duration' => $t->result(), 'filter' => $filter, 'from' => $from)); $this->getStat()->timer(MONGO_CREATE_TABLE . ".{$tableType}", $t->result()); }
* @param string $subject * @param array $triples * @param array $errors * @param string $podName * @param string $storeName */ function load(\Tripod\Mongo\TriplesUtil $loader, $subject, array $triples, array &$errors, $podName, $storeName) { try { $loader->loadTriplesAbout($subject, $triples, $storeName, $podName); } catch (Exception $e) { print "Exception for subject {$subject} failed with message: " . $e->getMessage() . "\n"; $errors[] = $subject; } } $timer = new \Tripod\Timer(); $timer->start(); if ($argc != 4) { echo "usage: ./loadTriples.php storename podname tripodConfig.json < ntriplesdata\n"; die; } array_shift($argv); $storeName = $argv[0]; $podName = $argv[1]; \Tripod\Mongo\Config::setConfig(json_decode(file_get_contents($argv[2]), true)); $i = 0; $currentSubject = ""; $triples = array(); $errors = array(); // array of subjects that failed to insert, even after retry... $loader = new \Tripod\Mongo\TriplesUtil();
$viewSpec = \Tripod\Mongo\Config::getInstance()->getViewSpecification($storeName, $viewId); if (array_key_exists("from", $viewSpec)) { \Tripod\Mongo\Config::getInstance()->setMongoCursorTimeout(-1); print "Generating {$viewId}"; $tripod = new \Tripod\Mongo\Driver($viewSpec['from'], $storeName, array('stat' => $stat)); $views = $tripod->getTripodViews(); if ($id) { print " for {$id}....\n"; $views->generateView($viewId, $id, null, $queue); } else { print " for all views....\n"; $views->generateView($viewId, null, null, $queue); } } } $t = new \Tripod\Timer(); $t->start(); \Tripod\Mongo\Config::setConfig(json_decode(file_get_contents($configLocation), true)); if (isset($options['s']) || isset($options['storename'])) { $storeName = isset($options['s']) ? $options['s'] : $options['storename']; } else { $storeName = null; } if (isset($options['v']) || isset($options['spec'])) { $viewId = isset($options['v']) ? $options['v'] : $options['spec']; } else { $viewId = null; } if (isset($options['i']) || isset($options['id'])) { $id = isset($options['i']) ? $options['i'] : $options['id']; } else {
/** * Run the DiscoverImpactedSubjects job * @throws \Exception */ public function perform() { try { $this->debugLog("[JOBID " . $this->job->payload['id'] . "] DiscoverImpactedSubjects::perform() start"); $timer = new \Tripod\Timer(); $timer->start(); $this->validateArgs(); // set the config to what is received \Tripod\Mongo\Config::setConfig($this->args[self::TRIPOD_CONFIG_KEY]); $statsConfig = array(); if (isset($this->args['statsConfig'])) { $statsConfig['statsConfig'] = $this->args['statsConfig']; } $tripod = $this->getTripod($this->args[self::STORE_NAME_KEY], $this->args[self::POD_NAME_KEY], $statsConfig); $operations = $this->args[self::OPERATIONS_KEY]; $subjectsAndPredicatesOfChange = $this->args[self::CHANGES_KEY]; $subjectCount = 0; foreach ($operations as $op) { /** @var \Tripod\Mongo\Composites\IComposite $composite */ $composite = $tripod->getComposite($op); $modifiedSubjects = $composite->getImpactedSubjects($subjectsAndPredicatesOfChange, $this->args[self::CONTEXT_ALIAS_KEY]); if (!empty($modifiedSubjects)) { /* @var $subject \Tripod\Mongo\ImpactedSubject */ foreach ($modifiedSubjects as $subject) { $subjectCount++; $subjectTimer = new \Tripod\Timer(); $subjectTimer->start(); if (isset($this->args[self::QUEUE_KEY]) || count($subject->getSpecTypes()) == 0) { $queueName = isset($this->args[self::QUEUE_KEY]) ? $this->args[self::QUEUE_KEY] : Config::getApplyQueueName(); $this->addSubjectToQueue($subject, $queueName); } else { $specsGroupedByQueue = array(); foreach ($subject->getSpecTypes() as $specType) { $spec = null; switch ($subject->getOperation()) { case OP_VIEWS: $spec = Config::getInstance()->getViewSpecification($this->args[self::STORE_NAME_KEY], $specType); break; case OP_TABLES: $spec = Config::getInstance()->getTableSpecification($this->args[self::STORE_NAME_KEY], $specType); break; case OP_SEARCH: $spec = Config::getInstance()->getSearchDocumentSpecification($this->args[self::STORE_NAME_KEY], $specType); break; } if (!$spec || !isset($spec['queue'])) { if (!$spec) { $spec = array(); } $spec['queue'] = Config::getApplyQueueName(); } if (!isset($specsGroupedByQueue[$spec['queue']])) { $specsGroupedByQueue[$spec['queue']] = array(); } $specsGroupedByQueue[$spec['queue']][] = $specType; } foreach ($specsGroupedByQueue as $queueName => $specs) { $queuedSubject = new \Tripod\Mongo\ImpactedSubject($subject->getResourceId(), $subject->getOperation(), $subject->getStoreName(), $subject->getPodName(), $specs); $this->addSubjectToQueue($queuedSubject, $queueName); } } $subjectTimer->stop(); // stat time taken to discover impacted subjects for the given subject of change $this->getStat()->timer(MONGO_QUEUE_DISCOVER_SUBJECT, $subjectTimer->result()); } if (!empty($this->subjectsGroupedByQueue)) { foreach ($this->subjectsGroupedByQueue as $queueName => $subjects) { $this->getApplyOperation()->createJob($subjects, $queueName, $statsConfig); } $this->subjectsGroupedByQueue = array(); } } } // stat time taken to process item, from time it was created (queued) $timer->stop(); $this->getStat()->timer(MONGO_QUEUE_DISCOVER_SUCCESS, $timer->result()); $this->debugLog("[JOBID " . $this->job->payload['id'] . "] DiscoverImpactedSubjects::perform() done in {$timer->result()}ms"); $this->getStat()->increment(MONGO_QUEUE_DISCOVER_JOB . '.' . SUBJECT_COUNT, $subjectCount); } catch (\Exception $e) { $this->getStat()->increment(MONGO_QUEUE_DISCOVER_FAIL); $this->errorLog("Caught exception in " . get_class($this) . ": " . $e->getMessage()); throw $e; } }
/** * Given a specific $viewId, generates a single view for the $resource * @param string $viewId * @param string|null $resource * @param string|null $context * @param string|null $queueName Queue for background bulk generation * @throws \Tripod\Exceptions\ViewException * @return array */ public function generateView($viewId, $resource = null, $context = null, $queueName = null) { $contextAlias = $this->getContextAlias($context); $viewSpec = Config::getInstance()->getViewSpecification($this->storeName, $viewId); if ($viewSpec == null) { $this->debugLog("Could not find a view specification for {$resource} with viewId '{$viewId}'"); return null; } else { $t = new \Tripod\Timer(); $t->start(); $from = $this->getFromCollectionForViewSpec($viewSpec); $collection = $this->config->getCollectionForView($this->storeName, $viewId); if (!isset($viewSpec['joins'])) { throw new \Tripod\Exceptions\ViewException('Could not find any joins in view specification - usecase better served with select()'); } // ensure that the ID field, view type, and the impactIndex indexes are correctly set up $collection->createIndex(array('_id.r' => 1, '_id.c' => 1, '_id.type' => 1), array('background' => 1)); $collection->createIndex(array('_id.type' => 1), array('background' => 1)); $collection->createIndex(array('value.' . _IMPACT_INDEX => 1), array('background' => 1)); // ensure any custom view indexes if (isset($viewSpec['ensureIndexes'])) { foreach ($viewSpec['ensureIndexes'] as $ensureIndex) { $collection->createIndex($ensureIndex, array('background' => 1)); } } $types = array(); // this is used to filter the CBD table to speed up the view creation if (is_array($viewSpec["type"])) { foreach ($viewSpec["type"] as $type) { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($type)); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($type)); } } else { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($viewSpec["type"])); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($viewSpec["type"])); } $filter = array('$or' => $types); if (isset($resource)) { $resourceAlias = $this->labeller->uri_to_alias($resource); $filter["_id"] = array(_ID_RESOURCE => $resourceAlias, _ID_CONTEXT => $contextAlias); } $docs = $this->config->getCollectionForCBD($this->storeName, $from)->find($filter, array('maxTimeMS' => \Tripod\Mongo\Config::getInstance()->getMongoCursorTimeout())); foreach ($docs as $doc) { if ($queueName && !$resource) { $subject = new ImpactedSubject($doc['_id'], OP_VIEWS, $this->storeName, $from, array($viewId)); $jobOptions = array(); if ($this->stat || !empty($this->statsConfig)) { $jobOptions['statsConfig'] = $this->getStatsConfig(); } $this->getApplyOperation()->createJob(array($subject), $queueName, $jobOptions); } else { // set up ID $id = array("_id" => array(_ID_RESOURCE => $doc["_id"][_ID_RESOURCE], _ID_CONTEXT => $doc["_id"][_ID_CONTEXT], _ID_TYPE => $viewSpec['_id'])); $generatedView = $id; $value = array(); // everything must go in the value object todo: this is a hang over from map reduce days, engineer out once we have stability on new PHP method for M/R $value[_GRAPHS] = array(); $buildImpactIndex = true; if (isset($viewSpec['ttl'])) { $buildImpactIndex = false; $value[_EXPIRES] = \Tripod\Mongo\DateUtil::getMongoDate($this->getExpirySecFromNow($viewSpec['ttl']) * 1000); } else { $value[_IMPACT_INDEX] = array($doc['_id']); } $this->doJoins($doc, $viewSpec['joins'], $value, $from, $contextAlias, $buildImpactIndex); // add top level properties $value[_GRAPHS][] = $this->extractProperties($doc, $viewSpec, $from); $generatedView['value'] = $value; $collection->replaceOne($id, $generatedView, ['upsert' => true]); } } $t->stop(); $this->timingLog(MONGO_CREATE_VIEW, array('view' => $viewSpec['type'], 'duration' => $t->result(), 'filter' => $filter, 'from' => $from)); $this->getStat()->timer(MONGO_CREATE_VIEW . ".{$viewId}", $t->result()); } }
$spec = \Tripod\Mongo\Config::getInstance()->getSearchDocumentSpecification($storeName, $specId); if (array_key_exists("from", $spec)) { \Tripod\Mongo\Config::getInstance()->setMongoCursorTimeout(-1); print "Generating {$specId}"; $tripod = new \Tripod\Mongo\Driver($spec['from'], $storeName, array('stat' => $stat)); $search = $tripod->getSearchIndexer(); if ($id) { print " for {$id}....\n"; $search->generateSearchDocuments($specId, $id, null, $queue); } else { print " for all tables....\n"; $search->generateSearchDocuments($specId, null, null, $queue); } } } $t = new \Tripod\Timer(); $t->start(); \Tripod\Mongo\Config::setConfig(json_decode(file_get_contents($configLocation), true)); if (isset($options['s']) || isset($options['storename'])) { $storeName = isset($options['s']) ? $options['s'] : $options['storename']; } else { $storeName = null; } if (isset($options['d']) || isset($options['spec'])) { $specId = isset($options['d']) ? $options['t'] : $options['spec']; } else { $specId = null; } if (isset($options['i']) || isset($options['id'])) { $id = isset($options['i']) ? $options['i'] : $options['id']; } else {
<?php require_once dirname(__FILE__) . '/common.inc.php'; require_once dirname(dirname(dirname(__FILE__))) . '/src/tripod.inc.php'; if ($argc != 2 && $argc != 3 && $argc != 4) { echo "usage: php ensureIndexes.php tripodConfig.json [storeName] [forceReindex (default is false)]\n"; die; } array_shift($argv); \Tripod\Mongo\Config::setConfig(json_decode(file_get_contents($argv[0]), true)); $storeName = isset($argv[1]) ? $argv[1] : null; $forceReindex = isset($argv[2]) && $argv[2] == "true" ? true : false; \Tripod\Mongo\Config::getInstance()->setMongoCursorTimeout(-1); $ei = new \Tripod\Mongo\IndexUtils(); $t = new \Tripod\Timer(); $t->start(); print "About to start indexing on {$storeName}...\n"; $ei->ensureIndexes($forceReindex, $storeName); $t->stop(); print "Indexing complete, took {$t->result()} seconds\n";
/** * @param array $query * @param string $type * @param Collection|null $collection * @param array $includeProperties * @param int $cursorSize * @return MongoGraph */ protected function fetchGraph($query, $type, $collection = null, $includeProperties = array(), $cursorSize = 101) { $graph = new MongoGraph(); $t = new \Tripod\Timer(); $t->start(); if ($collection == null) { $collection = $this->collection; $collectionName = $collection->getCollectionName(); } else { $collectionName = $collection->getCollectionName(); } if (empty($includeProperties)) { $cursor = $collection->find($query); } else { $fields = array(); foreach ($includeProperties as $property) { $fields[$this->labeller->uri_to_alias($property)] = true; } $cursor = $collection->find($query, array('projection' => $fields, 'batchSize' => $cursorSize)); } $ttlExpiredResources = false; $retries = 1; $exception = null; $cursorSuccess = false; do { try { foreach ($cursor as $result) { // handle MONGO_VIEWS that have expired due to ttl. These are expired // on read (lazily) rather than on write if ($type == MONGO_VIEW && array_key_exists(_EXPIRES, $result['value'])) { // if expires < current date, regenerate view.. $currentDate = \Tripod\Mongo\DateUtil::getMongoDate(); if ($result['value'][_EXPIRES]->__toString() < $currentDate) { // regenerate! $this->generateView($result['_id']['type'], $result['_id']['r']); } } $graph->add_tripod_array($result); } $cursorSuccess = true; } catch (\Exception $e) { self::getLogger()->error("CursorException attempt " . $retries . ". Retrying...:" . $e->getMessage()); sleep(1); $retries++; $exception = $e; } } while ($retries <= Config::CONNECTION_RETRIES && $cursorSuccess === false); if ($cursorSuccess === false) { self::getLogger()->error("CursorException failed after " . $retries . " attempts (MAX:" . Config::CONNECTION_RETRIES . "): " . $e->getMessage()); throw new \Exception($exception); } if ($ttlExpiredResources) { // generate views and retry... $this->debugLog("One or more view had exceeded TTL was regenerated - request again..."); $graph = $this->fetchGraph($query, $type, $collection); } $t->stop(); $this->timingLog($type, array('duration' => $t->result(), 'query' => $query, 'collection' => $collectionName)); if ($type == MONGO_VIEW) { if (array_key_exists("_id.type", $query)) { $this->getStat()->timer("{$type}.{$query["_id.type"]}", $t->result()); } else { if (array_key_exists("_id", $query) && array_key_exists("type", $query["_id"])) { $this->getStat()->timer("{$type}.{$query["_id"]["type"]}", $t->result()); } } } else { $this->getStat()->timer("{$type}.{$collectionName}", $t->result()); } return $graph; }
/** * Selects $fields from the result set determined by $query. * Returns an array of all results, each array element is a CBD graph, keyed by r * @param array $query * @param array $fields array of fields, in the same format as prescribed by MongoPHP * @param null $sortBy * @param null $limit * @param int $offset * @param null $context * @return array MongoGraphs, keyed by subject */ public function select($query, $fields, $sortBy = null, $limit = null, $offset = 0, $context = null) { $t = new \Tripod\Timer(); $t->start(); $contextAlias = $this->getContextAlias($context); // make sure context is represented - but not at the expense of $ operands queries failing if (array_key_exists('_id', $query) && is_array($query["_id"])) { if (!array_key_exists(_ID_CONTEXT, $query['_id']) && array_key_exists(_ID_RESOURCE, $query['_id'])) { // add context $query["_id"][_ID_CONTEXT] = $contextAlias; } else { // check query does not have a $ operand foreach ($query["_id"] as $key => $queryProps) { if (substr($key, 0, 1) == '$' && is_array($queryProps)) { foreach ($queryProps as $index => $queryProp) { if (is_array($queryProp) && array_key_exists(_ID_RESOURCE, $queryProp)) { $queryProp[_ID_CONTEXT] = $contextAlias; $query["_id"][$key][$index] = $queryProp; } } } } } } else { if (!array_key_exists('_id', $query)) { // this query did not have _id referenced at all - just add an _id.c clause $query["_id." . _ID_CONTEXT] = $contextAlias; } } $findOptions = array('projection' => $fields); if (!empty($limit)) { $findOptions['skip'] = (int) $offset; $findOptions['limit'] = (int) $limit; } if (isset($sortBy)) { $findOptions['sort'] = $sortBy; } $results = $this->collection->find($query, $findOptions); $t->stop(); $this->timingLog(MONGO_SELECT, array('duration' => $t->result(), 'query' => $query)); $this->getStat()->timer(MONGO_SELECT . ".{$this->podName}", $t->result()); $rows = array(); $count = $this->collection->count($query); foreach ($results as $doc) { $row = array(); foreach ($doc as $key => $value) { if ($key == _ID_KEY || $key === _VERSION) { $row[$key] = $value; } else { if (array_key_exists(VALUE_LITERAL, $value)) { $row[$key] = $value[VALUE_LITERAL]; } else { if (array_key_exists(VALUE_URI, $value)) { $row[$key] = $value[VALUE_URI]; } else { $row[$key] = array(); // possible array of values foreach ($value as $v) { $row[$key][] = array_key_exists(VALUE_LITERAL, $v) ? $v[VALUE_LITERAL] : $v[VALUE_URI]; } } } } } $rows[] = $row; } $result = array("head" => array("count" => $count, "offset" => $offset, "limit" => $limit), "results" => $rows); return $result; }
/** * @param array $rdfTypes * @param string $resource * @param string $context * @return array * @throws \Exception */ public function generateSearchDocumentsBasedOnRdfTypes(array $rdfTypes, $resource, $context) { if (empty($resource)) { throw new \Exception("Resource must be specified"); } if (empty($context)) { throw new \Exception("Context must be specified"); } // this is what is returned $generatedSearchDocuments = array(); $timer = new \Tripod\Timer(); $timer->start(); foreach ($rdfTypes as $rdfType) { $specs = Config::getInstance()->getSearchDocumentSpecifications($this->storeName, $rdfType); if (empty($specs)) { continue; } // no point doing anything else if there is no spec for the type foreach ($specs as $searchSpec) { $generatedSearchDocuments[] = $this->generateSearchDocumentBasedOnSpecId($searchSpec['_id'], $resource, $context); } } $timer->stop(); //echo "\n\tTook " . $timer->result() . " ms to generate search documents\n"; return $generatedSearchDocuments; }
/** * @param string $searchDocumentType * @param string|null $resourceUri * @param string|null $context * @param string|null $queueName */ public function generateSearchDocuments($searchDocumentType, $resourceUri = null, $context = null, $queueName = null) { $t = new \Tripod\Timer(); $t->start(); // default the context $contextAlias = $this->getContextAlias($context); $spec = \Tripod\Mongo\Config::getInstance()->getSearchDocumentSpecification($this->getStoreName(), $searchDocumentType); if ($resourceUri) { $this->generateAndIndexSearchDocuments($resourceUri, $contextAlias, $spec['from'], $searchDocumentType); return; } // default collection $from = isset($spec["from"]) ? $spec["from"] : $this->podName; $types = array(); if (is_array($spec["type"])) { foreach ($spec["type"] as $type) { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($type)); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($type)); } } else { $types[] = array("rdf:type.u" => $this->labeller->qname_to_alias($spec["type"])); $types[] = array("rdf:type.u" => $this->labeller->uri_to_alias($spec["type"])); } $filter = array('$or' => $types); if (isset($resource)) { $filter["_id"] = array(_ID_RESOURCE => $this->labeller->uri_to_alias($resource), _ID_CONTEXT => $contextAlias); } $docs = $this->config->getCollectionForCBD($this->getStoreName(), $from)->find($filter, array('maxTimeMS' => $this->config->getMongoCursorTimeout())); foreach ($docs as $doc) { if ($queueName && !$resourceUri) { $subject = new ImpactedSubject($doc['_id'], OP_SEARCH, $this->storeName, $from, array($searchDocumentType)); $jobOptions = array(); if ($this->stat || !empty($this->statsConfig)) { $jobOptions['statsConfig'] = $this->getStatsConfig(); } $this->getApplyOperation()->createJob(array($subject), $queueName, $jobOptions); } else { $this->generateAndIndexSearchDocuments($doc[_ID_KEY][_ID_RESOURCE], $doc[_ID_KEY][_ID_CONTEXT], $spec['from'], $searchDocumentType); } } $t->stop(); $this->timingLog(MONGO_CREATE_TABLE, array('type' => $spec['type'], 'duration' => $t->result(), 'filter' => $filter, 'from' => $from)); $this->getStat()->timer(MONGO_CREATE_SEARCH_DOC . ".{$searchDocumentType}", $t->result()); }
/** * Processes each subject synchronously * @param array $subjectsAndPredicatesOfChange * @param string $contextAlias */ protected function processSyncOperations(array $subjectsAndPredicatesOfChange, $contextAlias) { foreach ($this->getSyncOperations() as $op) { /** @var \Tripod\Mongo\Composites\IComposite $composite */ $composite = $this->tripod->getComposite($op); $opSubjects = $composite->getImpactedSubjects($subjectsAndPredicatesOfChange, $contextAlias); if (!empty($opSubjects)) { foreach ($opSubjects as $subject) { /* @var $subject ImpactedSubject */ $t = new \Tripod\Timer(); $t->start(); // Call update on the subject, rather than the composite directly, in case the change was to // another pod $subject->update($subject); $t->stop(); $this->timingLog(MONGO_ON_THE_FLY_MR, array("duration" => $t->result(), "storeName" => $subject->getStoreName(), "podName" => $subject->getPodName(), "resourceId" => $subject->getResourceId())); $this->getStat()->timer(MONGO_ON_THE_FLY_MR, $t->result()); } } } }
/** * @param string $q * @param string $type * @param array $indices * @param array $fields * @param int $limit * @param int $offset * @return array|mixed * @throws \Tripod\Exceptions\SearchException */ public function search($q, $type, $indices = array(), $fields = array(), $limit = 10, $offset = 0) { if (empty($q)) { throw new \Tripod\Exceptions\SearchException("You must specify a query"); } if (empty($type)) { throw new \Tripod\Exceptions\SearchException("You must specify the search document type to restrict the query to"); } if (empty($indices)) { throw new \Tripod\Exceptions\SearchException("You must specify at least one index from the search document specification to query against"); } if (empty($fields)) { throw new \Tripod\Exceptions\SearchException("You must specify at least one field from the search document specification to return"); } if (!is_numeric($limit) || $limit < 0) { throw new \Tripod\Exceptions\SearchException("Value for limit must be a positive number"); } if (!is_numeric($offset) || $offset < 0) { throw new \Tripod\Exceptions\SearchException("Value for offset must be a positive number"); } $original_terms = explode(" ", trim(strtolower($q))); $terms = array_values(array_diff($original_terms, $this->stopWords)); // todo: this means if all the words entered were stop words, then use the orginal terms rather than do nothing! if (empty($terms)) { $terms = $original_terms; } $regexes = array(); foreach ($terms as $t) { $regexes[] = new Regex("{$t}", ''); } $query = array(); $query['_id.type'] = $type; if (count($indices) == 1) { $searchIndex = $indices[0]; $query[$searchIndex] = array('$all' => $regexes); } else { $query['$or'] = array(); foreach ($indices as $searchIndex) { $query['$or'][] = array("{$searchIndex}" => array('$all' => $regexes)); } } $fieldsToReturn = array(); foreach ($fields as $field) { $fieldsToReturn[$field] = 1; } $searchTimer = new \Tripod\Timer(); $searchTimer->start(); $cursor = $this->config->getCollectionForSearchDocument($this->storeName, $type)->find($query, array('projection' => $fieldsToReturn, 'limit' => $limit, 'skip' => $offset)); $searchResults = array(); $searchResults['head'] = array(); $searchResults['head']['count'] = ""; $searchResults['head']['limit'] = $limit; $searchResults['head']['offset'] = $offset; $searchResults['head']['duration'] = ""; $searchResults['head']['query'] = $q; $searchResults['head']['query_terms_used'] = $terms; $searchResults['results'] = array(); $count = $this->config->getCollectionForSearchDocument($this->storeName, $type)->count($query); if ($count > 0) { $searchResults['head']['count'] = $count; foreach ($cursor as $result) { // if more than one field has been asked for we need to // enumerate them in the results returned. However if only one has been // asked for then results is just set to that single fields value. if (count($fields) > 1) { $r = array(); foreach ($fields as $field) { if (isset($result[$field])) { $r[$field] = $result[$field]; } else { $r[$field] = ""; } } $searchResults['results'][] = $r; } else { $searchResults['results'][] = $result[$fields[0]]; } } } else { $searchResults['head']['count'] = 0; } $searchTimer->stop(); $searchResults['head']["duration"] = $searchTimer->result() . ' ms'; return $searchResults; }