public function testFilterSuggestions() { $p7 = PropertyId::newFromNumber(7); $p10 = PropertyId::newFromNumber(10); $p12 = PropertyId::newFromNumber(12); $p15 = PropertyId::newFromNumber(15); $p23 = PropertyId::newFromNumber(23); $suggestions = array(new Suggestion($p12, 0.9), new Suggestion($p23, 0.8), new Suggestion($p7, 0.7), new Suggestion($p15, 0.6)); $resultSize = 2; $this->termIndex->expects($this->any())->method('getTopMatchingTerms')->will($this->returnValue($this->getTermIndexEntryArrayWithIds(array($p7, $p10, $p15, $p12)))); $result = $this->suggestionGenerator->filterSuggestions($suggestions, 'foo', 'en', $resultSize); $this->assertEquals(array($suggestions[0], $suggestions[2]), $result); }
/** * @param string $text * @param string $languageCode * @param string $entityType * @param string[] $termTypes * * @return TermIndexEntry[] */ private function getMatchingTermIndexEntries($text, $languageCode, $entityType, array $termTypes) { $languageCodes = array($languageCode); $matchedTermIndexEntries = $this->termIndex->getTopMatchingTerms($this->makeTermIndexEntryTemplates($text, $languageCodes, $termTypes), null, $entityType, $this->getTermIndexOptions()); // Shortcut out if we already have enough TermIndexEntries if (count($matchedTermIndexEntries) >= $this->limit || !$this->useLanguageFallback) { return $matchedTermIndexEntries; } $matchedEntityIdSerializations = array(); foreach ($matchedTermIndexEntries as $termIndexEntry) { $matchedEntityIdSerializations[] = $termIndexEntry->getEntityId()->getSerialization(); } if ($this->useLanguageFallback) { $fallbackMatchedTermIndexEntries = $this->termIndex->getTopMatchingTerms($this->makeTermIndexEntryTemplates($text, $this->addFallbackLanguageCodes($languageCodes), $termTypes), null, $entityType, $this->getTermIndexOptions()); // Remove any IndexEntries that are already have an match for foreach ($fallbackMatchedTermIndexEntries as $key => $termIndexEntry) { if (in_array($termIndexEntry->getEntityId()->getSerialization(), $matchedEntityIdSerializations)) { unset($fallbackMatchedTermIndexEntries[$key]); } } // Matches in the main language will always be first $matchedTermIndexEntries = array_merge($matchedTermIndexEntries, $fallbackMatchedTermIndexEntries); if (count($matchedTermIndexEntries) > $this->limit) { array_slice($matchedTermIndexEntries, 0, $this->limit, true); } } return $matchedTermIndexEntries; }
/** * @param Suggestion[] $suggestions * @param string $language * @return array[] */ public function createResultArray(array $suggestions, $language) { $entries = array(); $ids = array(); foreach ($suggestions as $suggestion) { $id = $suggestion->getPropertyId(); $ids[] = $id; } //See SearchEntities $terms = $this->termIndex->getTermsOfEntities($ids, null, array($language)); $clusteredTerms = $this->clusterTerms($terms); foreach ($suggestions as $suggestion) { $id = $suggestion->getPropertyId(); $entries[] = $this->buildEntry($id, $clusteredTerms, $suggestion); } return $entries; }
/** * @param string $search * @param string $language * @return PropertyId[] */ private function getMatchingIDs($search, $language) { $termIndexEntries = $this->termIndex->getTopMatchingTerms(array(new TermIndexEntry(array('termLanguage' => $language, 'termText' => $search))), array(TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_ALIAS), Property::ENTITY_TYPE, array('caseSensitive' => false, 'prefixSearch' => true)); $ids = array(); foreach ($termIndexEntries as $entry) { $ids[] = $entry->getEntityId(); } return $ids; }
/** * @param EntityId[] $entityIds * @param string[]|null $termTypes * @param string[]|null $languageCodes * * @return TermIndexEntry[] */ private function getTermsOfEntities(array $entityIds, array $termTypes = null, array $languageCodes = null) { $entityIdGroups = $this->splitPageEntityMapByType($entityIds); $terms = array(); foreach ($entityIdGroups as $entityIds) { $terms = array_merge($terms, $this->termIndex->getTermsOfEntities($entityIds, $termTypes, $languageCodes)); } return $terms; }
protected function loadProperties() { $termTemplate = new TermIndexEntry(array('termType' => 'label', 'termLanguage' => $this->languageCode, 'entityType' => Property::ENTITY_TYPE)); $terms = $this->termIndex->getMatchingTerms(array($termTemplate), 'label', Property::ENTITY_TYPE, array('caseSensitive' => true, 'prefixSearch' => false, 'LIMIT' => false)); $propertiesByLabel = array(); foreach ($terms as $term) { $label = $term->getText(); $propertiesByLabel[$label] = $term->getEntityId(); } return $propertiesByLabel; }
/** * Loads a set of terms into the buffer. * The source from which to fetch would typically be supplied to the buffer's constructor. * * @param EntityId[] $entityIds * @param string[]|null $termTypes * @param string[]|null $languageCodes * * @throws StorageException */ public function prefetchTerms(array $entityIds, array $termTypes = null, array $languageCodes = null) { if (empty($entityIds)) { return; } // We could first check what's already in the buffer, but it's hard to determine which // entities are actually "fully covered" by the cached terms. Also, our current use case // (the ChangesListInitRows hook) would generally, trigger only one call to prefetchTerms, // before any call to getTermsOfType(). $entityIdsByType = $this->groupEntityIds($entityIds); $terms = array(); foreach ($entityIdsByType as $entityIdGroup) { $terms = array_merge($terms, $this->termIndex->getTermsOfEntities($entityIdGroup, $termTypes, $languageCodes)); } $bufferedKeys = $this->setBufferedTermObjects($terms); if (!empty($languageCodes)) { $this->setUndefinedTerms($entityIds, $termTypes, $languageCodes, $bufferedKeys); } }
private function termExists(TermIndex $termIndex, $text, $termType = null, $language = null, $entityType = null) { $termFields = array(); $termFields['termText'] = $text; if ($language !== null) { $termFields['termLanguage'] = $language; } $matches = $termIndex->getMatchingTerms(array(new TermIndexEntry($termFields)), $termType, $entityType); return !empty($matches); }
/** * @param EntityId $entityId * @param string $termType * @param string[]|null $languageCodes The languages to get terms for; null means all languages. * * @return string[] */ protected function getTermsOfType(EntityId $entityId, $termType, array $languageCodes = null) { $wikibaseTerms = $this->termIndex->getTermsOfEntity($entityId, array($termType), $languageCodes); return $this->convertTermsToMap($wikibaseTerms); }