/** * This takes an Omeka_Record instance and returns a populated * Apache_Solr_Document. * * @param Omeka_Record $item The record to index. * * @return Apache_Solr_Document * @author Eric Rochester <*****@*****.**> **/ public static function itemToDocument($item) { $fields = get_db()->getTable('SolrSearchField'); $doc = new Apache_Solr_Document(); $doc->setField('id', "Item_{$item->id}"); $doc->setField('resulttype', 'Item'); $doc->setField('model', 'Item'); $doc->setField('modelid', $item->id); // Title: $title = metadata($item, array('Dublin Core', 'Title')); $doc->setField('title', $title); // Elements: self::indexItem($fields, $item, $doc); // Tags: foreach ($item->getTags() as $tag) { $doc->setMultiValue('tag', $tag->name); } // Collection: if ($collection = $item->getCollection()) { $doc->collection = metadata($collection, array('Dublin Core', 'Title')); } // Item type: if ($itemType = $item->getItemType()) { $doc->itemtype = $itemType->name; } $doc->featured = (bool) $item->featured; // File metadata foreach ($item->getFiles() as $file) { self::indexItem($fields, $file, $doc); } return $doc; }
protected function getMockDocument($id) { $document = new Apache_Solr_Document(); $document->setField('id', $id); $document->setField('title', "Item {$id}"); return $document; }
/** * Method for adding an object from the database into the index. * * @param DataObject * @param string * @param array */ protected function _addAs($object, $base, $options) { $includeSubs = $options['include_children']; $doc = new Apache_Solr_Document(); // Always present fields $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); $doc->setField('ID', $object->ID); $doc->setField('ClassName', $object->ClassName); foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) { $doc->addField('ClassHierarchy', $class); } // Add the user-specified fields foreach ($this->getFieldsIterator() as $name => $field) { if ($field['base'] == $base) { $this->_addField($doc, $object, $field); } } // CUSTOM Duplicate index combined fields ("Title" rather than // "SiteTree_Title"). // // This allows us to sort on these fields without deeper architectural // changes to the fulltextsearch module. Note: We can't use <copyField> // for this purpose because it only writes into multiValue=true // fields, and those can't be (reliably) sorted on. $this->_addField($doc, $object, $this->getCustomPropertyFieldData('Title', $object)); $this->_addField($doc, $object, $this->getCustomPropertyFieldData('LastEdited', $object, 'SSDatetime')); $this->getService()->addDocument($doc); return $doc; }
/** * @test */ public function transformsUnixTimestampToIsoDateOnSingleValuedField() { $this->documentMock->setField('dateField', '1262343600'); // 2010-01-01 12:00 $configuration = array('dateField' => 'timestampToIsoDate'); $this->service->processDocument($this->documentMock, $configuration); $value = $this->documentMock->getField('dateField'); $this->assertEquals($value['value'], '2010-01-01T12:00:00Z', 'field was not processed with timestampToIsoDate'); }
/** * modifies a document according to the given configuration * * @param Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = FALSE; if ($fieldInformation !== FALSE) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = TRUE; } switch ($instruction) { case 'timestampToIsoDate': $processor = t3lib_div::makeInstance('tx_solr_fieldprocessor_TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }
/** * Adds fields to the document as defined in $indexingConfiguration * * @param \Apache_Solr_Document $document base document to add fields to * @param array $indexingConfiguration Indexing configuration / mapping * @param array $data Record data * @return \Apache_Solr_Document Modified document with added fields */ protected function addDocumentFieldsFromTyposcript(\Apache_Solr_Document $document, array $indexingConfiguration, array $data) { // mapping of record fields => solr document fields, resolving cObj foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) { if (is_array($recordFieldName)) { // configuration for a content object, skipping continue; } if (!self::isAllowedToOverrideField($solrFieldName)) { throw new InvalidFieldNameException('Must not overwrite field .' . $solrFieldName, 1435441863); } $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data); if (is_array($fieldValue)) { // multi value foreach ($fieldValue as $multiValue) { $document->addField($solrFieldName, $multiValue); } } else { if ($fieldValue !== '' && $fieldValue !== null) { $document->setField($solrFieldName, $fieldValue); } } } return $document; }
/** * Builds an Apache_Solr_Document to pass to Apache_Solr_Service **/ public function buildDocument(array $document) { $doc = new Apache_Solr_Document(); foreach ($document as $fieldName => $field) { $value = $field['value']; // Apache_Solr_Document always expect arrays if (!is_array($value)) { $value = array($value); } if (isset($field['boost'])) { $doc->setField($fieldName, $value, $field['boost']); } else { $doc->setField($fieldName, $value); } } return $doc; }
public function testAddFieldWithBoostMultipliesWithAPreexistingBoost() { $field = 'field'; $boost = 0.5; // set a field with a boost $this->_fixture->setField($field, 'value1', $boost); // now add another value with the same boost $this->_fixture->addField($field, 'value2', $boost); // new boost should be $boost * $boost $this->assertEquals($boost * $boost, $this->_fixture->getFieldBoost($field)); }
/** * Build a Solr document for a specific file * * @param integer $storeId Store ID the file belongs to/where it is linked on a page * @param Asm_Solr_Model_Indexqueue_File $file The file to index * @return Apache_Solr_Document */ protected function buildFileDocument($storeId, Asm_Solr_Model_Indexqueue_File $file) { $helper = Mage::helper('solr'); $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB); $host = parse_url($baseUrl, PHP_URL_HOST); $document = new Apache_Solr_Document(); $document->setField('appKey', 'Asm_Solr'); $document->setField('type', 'solr/indexqueue_file'); $document->setField('id', $helper->getFileDocumentId($file->getId())); $document->setField('site', $host); $document->setField('siteHash', $helper->getSiteHashForDomain($host)); $document->setField('storeId', $storeId); $document->setField('changed', $helper->dateToIso($file->getFileLastChangedTime())); $document->setField('productId', 0); $document->setField('sku', 'solr/indexqueue_file'); $document->setField('title', $file->getName()); $document->setField('content', $file->getContent()); $document->setField('url', $file->getUrl()); return $document; }
/** * Adds fields to the document as defined in $indexingConfiguration * * @param Apache_Solr_Document $document base document to add fields to * @param array $indexingConfiguration Indexing configuration / mapping * @param array $data Record data * @return Apache_Solr_Document Modified document with added fields */ protected function addDocumentFieldsFromTyposcript(Apache_Solr_Document $document, array $indexingConfiguration, array $data) { // mapping of record fields => solr document fields, resolving cObj foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) { if (is_array($recordFieldName)) { // configuration for a content object, skipping continue; } $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data); if (is_array($fieldValue)) { // multi value foreach ($fieldValue as $multiValue) { $document->addField($solrFieldName, $multiValue); } } else { $document->setField($solrFieldName, $fieldValue); } } return $document; }
public function solrStore($contact) { $doc = new Apache_Solr_Document(); $doc->setField('id', $contact->id); $doc->setField('timecreate', $contact->timecreate->format('Y-m-d\TH:i:s\Z')); $doc->setField('timeupdate', $contact->timeupdate->format('Y-m-d\TH:i:s\Z')); $doc->setField('usercreate', $contact->usercreate); $doc->setField('userupdate', $contact->userupdate); $doc->setField('datasource', $contact->datasource_id); $doc->setField('domain', $GLOBALS['obm']['domain_id']); $doc->setField('in', $contact->addressbook); $doc->setField('addressbookId', $contact->addressbook_id); $doc->setField('company', $contact->company); $doc->setField('companyId', $contact->company_id); $doc->setField('commonname', $contact->commonname); $doc->setField('lastname', $contact->lastname); $doc->setField('firstname', $contact->firstname); $doc->setField('middlename', $contact->mname); $doc->setField('sortable', $contact->lastname." ".$contact->firstname); $doc->setField('suffix', $contact->suffix); $doc->setField('aka', $contact->aka); $doc->setField('kind', $contact->kind); //$doc->setField('kind', $db->f('kind_header')); $doc->setField('manager', $contact->manager); $doc->setField('assistant', $contact->assistant); $doc->setField('spouse', $contact->spouse); $doc->setField('birthdayId', $contact->birthday_event); $doc->setField('anniversaryId', $contact->anniversary_event); if($contact->birthday) $doc->setField('birthday', $contact->birthday->format('Y-m-d\TH:i:s\Z')); if($contact->anniversary) $doc->setField('anniversary', $contact->anniversary->format('Y-m-d\TH:i:s\Z')); $doc->setField('category', $contact->category); foreach($contact->categories as $category) { foreach($category as $c) { $doc->setMultiValue('categoryId', $c['id']); } } $doc->setField('service', $contact->service); $doc->setField('function', $contact->function); $doc->setField('title', $contact->title); if ($contact->archive) { $doc->setField('is', 'archive'); } if ($contact->collected) { $doc->setField('is', 'collected'); } if ($contact->mailok) { $doc->setField('is', 'mailing'); } if ($contact->newsletter) { $doc->setField('is', 'newsletter'); } if($contact->date) $doc->setField('date', $contact->date->format('Y-m-d\TH:i:s\Z')); $doc->setField('comment', $contact->comment); $doc->setField('comment2', $contact->comment2); $doc->setField('comment3', $contact->comment3); $doc->setField('from', $contact->origin); foreach($contact->email as $email) { $doc->setMultiValue('email', $email['address']); } foreach($contact->phone as $phone) { $doc->setMultiValue('phone', $phone['number']); } foreach($contact->im as $im) { $doc->setMultiValue('jabber', $im['address']); } foreach($contact->address as $address) { $doc->setMultiValue('street', $address['street']); $doc->setMultiValue('zipcode', $address['zipcode']); $doc->setMultiValue('expresspostal', $address['expresspostal']); $doc->setMultiValue('town', $address['town']); $doc->setMultiValue('country', $address['country']); } if($contact->hasACalendarUrl()){ $doc->setField('hasACalendar', "true"); } else { $doc->setField('hasACalendar', "false"); } OBM_IndexingService::store('contact', array($doc)); }
/** * modifies a document according to the given configuration * * @param \Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(\Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = false; if ($fieldInformation !== false) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = true; } switch ($instruction) { case 'timestampToUtcIsoDate': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToUtcIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'timestampToIsoDate': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'pathToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PathToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'pageUidToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PageUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'categoryUidToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\CategoryUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }
/** * Build a Solr document for a given page * * @param integer $storeId Store ID * @param Mage_Cms_Model_Page $page Page instance * @return Apache_Solr_Document */ protected function buildPageDocument($storeId, $page) { $helper = Mage::helper('solr'); $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB); $host = parse_url($baseUrl, PHP_URL_HOST); $document = new Apache_Solr_Document(); $document->setField('appKey', 'Asm_Solr'); $document->setField('type', 'cms/page'); $document->setField('id', $helper->getPageDocumentId($page->getId())); $document->setField('site', $host); $document->setField('siteHash', $helper->getSiteHashForDomain($host)); $document->setField('storeId', $storeId); $document->setField('created', $helper->dateToIso($page->getCreationTime())); $document->setField('changed', $helper->dateToIso($page->getUpdateTime())); $document->setField('sku', 'cms/page'); $document->setField('productId', 0); $document->setField('pageId', $page->getId()); $document->setField('title', $page->getTitle()); $document->setField('content', Mage::helper('solr/contentExtractor')->getIndexableContent($page->getContent())); $document->setField('keywords', $helper->trimExplode(',', $page->getMetaKeywords(), true)); $document->setField('url', Mage::helper('cms/page')->getPageUrl($page->getId())); return $document; }
/** * Processes a physical unit for the Solr index * * @access protected * * @param tx_dlf_document &$doc: The METS document * @param integer $page: The page number * @param array $physicalUnit: Array of the physical unit to process * * @return integer 0 on success or 1 on failure */ protected static function processPhysical(tx_dlf_document &$doc, $page, array $physicalUnit) { $errors = 0; // Read extension configuration. $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]); if (!empty($physicalUnit['files'][$extConf['fileGrpFulltext']])) { $file = $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpFulltext']]); // Load XML file. if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($file) || version_compare(phpversion(), '5.3.3', '<')) { // Set user-agent to identify self when fetching XML data. if (!empty($extConf['useragent'])) { @ini_set('user_agent', $extConf['useragent']); } // Turn off libxml's error logging. $libxmlErrors = libxml_use_internal_errors(TRUE); // disable entity loading $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE); // Load XML from file. $xml = simplexml_load_string(file_get_contents($file)); // reset entity loader setting libxml_disable_entity_loader($previousValueOfEntityLoader); // Reset libxml's error logging. libxml_use_internal_errors($libxmlErrors); if ($xml === FALSE) { return 1; } } else { return 1; } // Load class. if (!class_exists('Apache_Solr_Document')) { require_once \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Document.php'); } // Create new Solr document. $solrDoc = new Apache_Solr_Document(); // Create unique identifier from document's UID and unit's XML ID. $solrDoc->setField('id', $doc->uid . $physicalUnit['id']); $solrDoc->setField('uid', $doc->uid); $solrDoc->setField('pid', $doc->pid); $solrDoc->setField('page', $page); if (!empty($physicalUnit['files'][$extConf['fileGrpThumbs']])) { $solrDoc->setField('thumbnail', $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpThumbs']])); } $solrDoc->setField('partof', $doc->parentId); $solrDoc->setField('root', $doc->rootId); $solrDoc->setField('sid', $physicalUnit['id']); $solrDoc->setField('toplevel', FALSE); $solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']); $solrDoc->setField('fulltext', tx_dlf_alto::getRawText($xml)); try { self::$solr->service->addDocument($solrDoc); } catch (Exception $e) { if (!defined('TYPO3_cliMode')) { $message = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Messaging\\FlashMessage', tx_dlf_helper::getLL('flash.solrException', TRUE) . '<br />' . htmlspecialchars($e->getMessage()), tx_dlf_helper::getLL('flash.error', TRUE), \TYPO3\CMS\Core\Messaging\FlashMessage::ERROR, TRUE); tx_dlf_helper::addMessage($message); } return 1; } } return $errors; }
/** * * @param Apache_Solr_Document or Array $parts * * @return SP_Controller_Action_Helper_Solr */ public function pushDocuments($parts) { $this->_setSolrService(); if ($parts instanceof Apache_Solr_Document) { $this->documents[] = $parts; } else { if (is_array($parts)) { foreach ($parts as $item => $fields) { if ($fields instanceof Apache_Solr_Document) { $this->documents[] = $fields; } else { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->setField($key, $value); } } $this->documents[] = $part; } } } else { trigger_error("the paramter \$part must be an object of Apache_Solr_Document or an array"); } } return $this; }
<?php require dirname(__FILE__) . '/../../bootstrap/unit.php'; $t = new limeade_test(2, limeade_output::get()); $t->diag('document format a document'); $document = new Apache_Solr_Document(); $document->setBoost(10); $document->setField('sfl_guid', 'GUID_1234'); $document->setField('name', 'Thomas Rabaix', 1); $document->setMultiValue('skills', 'php'); $document->setMultiValue('skills', 'symfony'); $document->addField('skills', 'objective-c'); $expected = array('name' => 'skills', 'value' => array(0 => 'php', 1 => 'symfony', 2 => 'objective-c'), 'boost' => false); $t->cmp_ok($document->getField('skills'), '==', $expected, '::getField test multivalue setter'); $expected = array('name' => 'name', 'value' => 'Thomas Rabaix', 'boost' => 1); $t->cmp_ok($document->getField('name'), '==', $expected, '::getField test setter');
/** * Processes a logical unit (and its children) for the Solr index * * @access protected * * @param tx_dlf_document &$doc: The METS document * @param array $logicalUnit: Array of the logical unit to process * * @return integer 0 on success or 1 on failure */ protected static function process(tx_dlf_document &$doc, array $logicalUnit) { $errors = 0; // Get metadata for logical unit. $metadata = $doc->metadataArray[$logicalUnit['id']]; if (!empty($metadata)) { // Load class. if (!class_exists('Apache_Solr_Document')) { require_once t3lib_div::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Document.php'); } // Create new Solr document. $solrDoc = new Apache_Solr_Document(); // Create unique identifier from document's UID and unit's XML ID. $solrDoc->setField('id', $doc->uid . $logicalUnit['id']); $solrDoc->setField('uid', $doc->uid); $solrDoc->setField('pid', $doc->pid); if (tx_dlf_helper::testInt($logicalUnit['points'])) { $solrDoc->setField('page', $logicalUnit['points']); } if ($logicalUnit['id'] == $doc->toplevelId) { $solrDoc->setField('thumbnail', $doc->thumbnail); } elseif (!empty($logicalUnit['thumbnailId'])) { $solrDoc->setField('thumbnail', $doc->getFileLocation($logicalUnit['thumbnailId'])); } $solrDoc->setField('partof', $doc->parentId); $solrDoc->setField('sid', $logicalUnit['id']); $solrDoc->setField('toplevel', in_array($logicalUnit['type'], self::$toplevel)); $solrDoc->setField('type', $logicalUnit['type'], self::$fields['fieldboost']['type']); $solrDoc->setField('title', $metadata['title'][0], self::$fields['fieldboost']['title']); $solrDoc->setField('volume', $metadata['volume'][0], self::$fields['fieldboost']['volume']); $autocomplete = array(); foreach ($metadata as $index_name => $data) { if (!empty($data) && substr($index_name, -8) !== '_sorting') { $solrDoc->setField(self::getIndexFieldName($index_name, $doc->pid), $data, self::$fields['fieldboost'][$index_name]); if (in_array($index_name, self::$fields['sortables'])) { // Add sortable fields to index. $solrDoc->setField($index_name . '_sorting', $metadata[$index_name . '_sorting'][0]); } if (in_array($index_name, self::$fields['facets'])) { // Add facets to index. $solrDoc->setField($index_name . '_faceting', $data); } if (in_array($index_name, self::$fields['autocompleted'])) { $autocomplete = array_merge($autocomplete, $data); } } } // Add autocomplete values to index. if (!empty($autocomplete)) { $solrDoc->setField('autocomplete', $autocomplete); } try { self::$solr->service->addDocument($solrDoc); } catch (Exception $e) { if (!defined('TYPO3_cliMode')) { $message = t3lib_div::makeInstance('t3lib_FlashMessage', tx_dlf_helper::getLL('flash.solrException', TRUE) . '<br />' . htmlspecialchars($e->getMessage()), tx_dlf_helper::getLL('flash.error', TRUE), t3lib_FlashMessage::ERROR, TRUE); t3lib_FlashMessageQueue::addMessage($message); } return 1; } } // Check for child elements... if (!empty($logicalUnit['children'])) { foreach ($logicalUnit['children'] as $child) { if (!$errors) { // ...and process them, too. $errors = self::process($doc, $child); } else { break; } } } return $errors; }
public function convertObjectToDocument($dataObject, $stage = null, $fieldBoost = array()) { $document = new Apache_Solr_Document(); $fieldsToIndex = array(); $object = null; // whether the original item is an object or array. // determines how we treat the object later when checking all the fields $sourceObject = true; $id = 0; if (is_object($dataObject)) { if ($dataObject->hasMethod('hasField') && $dataObject->hasField('ShowInSearch') && !$dataObject->ShowInSearch) { return; } $fieldsToIndex = $this->getSearchableFieldsFor($dataObject); // $dataObject->searchableFields(); $object = $this->objectToFields($dataObject); $id = $dataObject->ID; } else { $object = $dataObject; $id = isset($dataObject['ID']) ? $dataObject['ID'] : 0; $fieldsToIndex = isset($object['index_fields']) ? $object['index_fields'] : array_flip(array_keys($object)); $sourceObject = false; } $fieldsToIndex['SS_URL'] = true; $fieldsToIndex['SS_ID'] = true; $fieldsToIndex['LastEdited'] = true; $fieldsToIndex['Created'] = true; $fieldsToIndex['ClassName'] = true; $fieldsToIndex['ClassNameHierarchy'] = true; $fieldsToIndex['ParentsHierarchy'] = true; // the stage we're on when we write this doc to the index. // this is used for versioned AND non-versioned objects; we just cheat and // set it BOTH stages if it's non-versioned object $fieldsToIndex['SS_Stage'] = true; // if it's a versioned object, just save ONE stage value. if ($stage) { $object['SS_Stage'] = array('Type' => 'Enum', 'Value' => $stage); $id = $id . '_' . $stage; } else { $object['SS_Stage'] = array('Type' => 'Enum', 'Value' => array('Stage', 'Live')); } if (!$id) { return false; } // specially handle the subsite module - this has serious implications for our search // @TODO we want to genercise this later for other modules to hook into it! if (ClassInfo::exists('Subsite')) { $fieldsToIndex['SubsiteID'] = true; if (is_object($dataObject)) { $object['SubsiteID'] = array('Type' => 'Int', 'Value' => $dataObject->SubsiteID); } } $classType = isset($object['ClassName']) ? $object['ClassName']['Value'] : null; // we're not indexing the ID field because it conflicts with Solr's internal ID unset($object['ID']); // a special type hierarchy if ($classType) { $classes = array_values(ClassInfo::ancestry($classType)); $object['ClassNameHierarchy'] = array('Type' => 'MultiValueField', 'Value' => $classes); $object['ParentsHierarchy'] = $this->getParentsHierarchyField($dataObject); } foreach ($object as $field => $valueDesc) { if (!$valueDesc) { continue; } if (!is_array($valueDesc) || !isset($valueDesc['Type'])) { // if we're indexing an object and there's no valueDesc, just skip this field if ($sourceObject) { continue; } $valueDesc = array('Value' => $valueDesc, 'Type' => $this->mapper->mapValueToType($field, $valueDesc)); } $type = $valueDesc['Type']; $value = $valueDesc['Value']; // this should have already been taken care of, but just in case... if ($type == 'MultiValueField' && $value instanceof MultiValueField) { $value = $value->getValues(); } if (!isset($fieldsToIndex[$field])) { continue; } $fieldName = $this->mapper->mapFieldNameFromType($field, $type, $fieldsToIndex[$field]); if (!$fieldName) { continue; } $value = $this->mapper->convertValue($value, $type); if (is_array($value)) { foreach ($value as $v) { $document->addField($fieldName, $v); } } else { $boost = false; if (isset($fieldBoost["{$fieldName}:{$value}"])) { $boost = $fieldBoost["{$fieldName}:{$value}"]; } $document->setField($fieldName, $value, $boost); $document->{$fieldName} = $value; } } $document->id = $classType ? $classType . '_' . $id : SolrSearchService::RAW_DATA_KEY . $id; return $document; }
/** * This takes an Omeka_Record instance and returns a populated * Apache_Solr_Document. * * @param Omeka_Record $item The record to index. * * @return Apache_Solr_Document * @author Eric Rochester <*****@*****.**> **/ public static function itemToDocument($item) { $fields = get_db()->getTable('SolrSearchField'); $doc = new Apache_Solr_Document(); $doc->setField('id', "Item_{$item->id}"); $doc->setField('resulttype', 'Item'); $doc->setField('model', 'Item'); $doc->setField('modelid', $item->id); $doc->setField('owner_id', $item->owner_id); // extend $doc to to include and items public / private status $doc->setField('public', $item->public); // Title: $title = metadata($item, array('Dublin Core', 'Title')); $doc->setField('title', $title); // Elements self::indexItem($fields, $item, $doc); // Tags: foreach ($item->getTags() as $tag) { $doc->setMultiValue('tag_t', $tag->name); $doc->setMultiValue('tag_s', $tag->name); } // Collection: if ($collection = $item->getCollection()) { $doc->collection = metadata($collection, array('Dublin Core', 'Title')); } // Item type: if ($itemType = $item->getItemType()) { $doc->itemtype = $itemType->name; } $doc->featured = (bool) $item->featured; // File metadata (this is weird) // foreach ($item->getFiles() as $file) { // self::indexItem($fields, $file, $doc); // } ############################################################################### //ADDITION FOR VISUALIZATIONS, FACETS AND PRIVACY --> move to indexitem //VISUALIZATION: if ($itemType = $item->getItemType()) { $doc->setField('itemtype_id', $item->item_type_id); } if ($collection = $item->getCollection()) { $doc->setField('collection_id', $item->collection_id); } // Start and end date(s): $date = metadata($item, array('Dublin Core', 'Date')); if (self::date_validate($date)) { $date_span = explode(' ', $date, 2); if (count($date_span) == 2) { $doc->setField('date_start', self::date_supplement($date_span[0])); $doc->setField('date_end', self::date_supplement($date_span[1])); } elseif (count($date_span) == 1) { $doc->setField('date_start', self::date_supplement($date_span[0])); $doc->setField('date_end', self::date_supplement($date_span[0])); } $doc->setField('decennium_group', self::date_decennium($date_span[0])); } // FACETS //text size and text size group if ($text = metadata($item, array('Item Type Metadata', 'Text'))) { $main_word_count = substr_count($text, ' '); $doc->setField('94_t', $main_word_count); $doc->setField('95_t', self::classify_length($main_word_count)); $doc->setField('94_s', $main_word_count); $doc->setField('95_s', self::classify_length($main_word_count)); } //Locations $db = get_db(); $locations = $db->getTable('Location')->findLocationByItem($item, false); if ($locations) { if (array_key_exists('narration_location', $locations)) { $location = $locations['narration_location']; $doc->setField("latitude", $location->latitude); $doc->setField("longitude", $location->longitude); $doc->setField("zoom_level", $location->zoom_level); $doc->setField("map_type_t", $location->map_type); $doc->setField("address_t", $location->address); $doc->setField("route_t", $location->route); $doc->setField("street_number_t", $location->street_number); $doc->setField("postal_code_t", $location->postal_code); $doc->setField("postal_code_prefix_t", $location->postal_code_prefix); $doc->setField("sublocality_t", $location->sublocality); $doc->setField("locality_t", $location->locality); $doc->setField("natural_feature_t", $location->natural_feature); $doc->setField("establishment_t", $location->establishment); $doc->setField("point_of_interest_t", $location->point_of_interest); $doc->setField("administrative_area_level_3_t", $location->administrative_area_level_3); $doc->setField("administrative_area_level_2_t", $location->administrative_area_level_2); $doc->setField("administrative_area_level_1_t", $location->administrative_area_level_1); $doc->setField("country_t", $location->country); $doc->setField("continent_t", $location->continent); $doc->setField("planetary_body_t", $location->planetary_body); $doc->setField("map_type_s", $location->map_type); $doc->setField("address_s", $location->address); $doc->setField("route_s", $location->route); $doc->setField("street_number_s", $location->street_number); $doc->setField("postal_code_s", $location->postal_code); $doc->setField("postal_code_prefix_s", $location->postal_code_prefix); $doc->setField("sublocality_s", $location->sublocality); $doc->setField("locality_s", $location->locality); $doc->setField("natural_feature_s", $location->natural_feature); $doc->setField("establishment_s", $location->establishment); $doc->setField("point_of_interest_s", $location->point_of_interest); $doc->setField("administrative_area_level_3_s", $location->administrative_area_level_3); $doc->setField("administrative_area_level_2_s", $location->administrative_area_level_2); $doc->setField("administrative_area_level_1_s", $location->administrative_area_level_1); $doc->setField("country_s", $location->country); $doc->setField("continent_s", $location->continent); $doc->setField("planetary_body_s", $location->planetary_body); } if (array_key_exists('action_location', $locations)) { $location = $locations['action_location']; $doc->setField("action_latitude", $location->latitude); $doc->setField("action_longitude", $location->longitude); $doc->setField("action_zoom_level", $location->zoom_level); $doc->setField("action_map_type_t", $location->map_type); $doc->setField("action_address_t", $location->address); $doc->setField("action_route_t", $location->route); $doc->setField("action_street_number_t", $location->street_number); $doc->setField("action_postal_code_t", $location->postal_code); $doc->setField("action_postal_code_prefix_t", $location->postal_code_prefix); $doc->setField("action_sublocality_t", $location->sublocality); $doc->setField("action_locality_t", $location->locality); $doc->setField("action_natural_feature_t", $location->natural_feature); $doc->setField("action_establishment_t", $location->establishment); $doc->setField("action_point_of_interest_t", $location->point_of_interest); $doc->setField("action_administrative_area_level_3_t", $location->administrative_area_level_3); $doc->setField("action_administrative_area_level_2_t", $location->administrative_area_level_2); $doc->setField("action_administrative_area_level_1_t", $location->administrative_area_level_1); $doc->setField("action_country_t", $location->country); $doc->setField("action_continent_t", $location->continent); $doc->setField("action_planetary_body_t", $location->planetary_body); $doc->setField("action_map_type_s", $location->map_type); $doc->setField("action_address_s", $location->address); $doc->setField("action_route_s", $location->route); $doc->setField("action_street_number_s", $location->street_number); $doc->setField("action_postal_code_s", $location->postal_code); $doc->setField("action_postal_code_prefix_s", $location->postal_code_prefix); $doc->setField("action_sublocality_s", $location->sublocality); $doc->setField("action_locality_s", $location->locality); $doc->setField("action_natural_feature_s", $location->natural_feature); $doc->setField("action_establishment_s", $location->establishment); $doc->setField("action_point_of_interest_s", $location->point_of_interest); $doc->setField("action_administrative_area_level_3_s", $location->administrative_area_level_3); $doc->setField("action_administrative_area_level_2_s", $location->administrative_area_level_2); $doc->setField("action_administrative_area_level_1_s", $location->administrative_area_level_1); $doc->setField("action_country_s", $location->country); $doc->setField("action_continent_s", $location->continent); $doc->setField("action_planetary_body_s", $location->planetary_body); } } ############################################################################### return $doc; }
function s4w_build_document($post_info, $domain = NULL, $path = NULL) { global $blog_id; global $current_blog; $doc = NULL; $plugin_s4w_settings = s4w_get_option(); $exclude_ids = $plugin_s4w_settings['s4w_exclude_pages']; $categoy_as_taxonomy = $plugin_s4w_settings['s4w_cat_as_taxo']; $index_comments = $plugin_s4w_settings['s4w_index_comments']; $index_custom_fields = $plugin_s4w_settings['s4w_index_custom_fields']; if ($post_info) { # check if we need to exclude this document if (is_multisite() && in_array($current_blog->domain . $post_info->ID, (array) $exclude_ids)) { return NULL; } else { if (!is_multisite() && in_array($post_info->ID, (array) $exclude_ids)) { return NULL; } } $doc = new Apache_Solr_Document(); $auth_info = get_userdata($post_info->post_author); # wpmu specific info if (is_multisite()) { // if we get here we expect that we've "switched" what blog we're running // as if ($domain == NULL) { $domain = $current_blog->domain; } if ($path == NULL) { $path = $current_blog->path; } $blogid = get_blog_id_from_url($domain, $path); $doc->setField('id', $domain . $path . $post_info->ID); $doc->setField('permalink', get_blog_permalink($blogid, $post_info->ID)); $doc->setField('blogid', $blogid); $doc->setField('blogdomain', $domain); $doc->setField('blogpath', $path); $doc->setField('wp', 'multisite'); } else { $doc->setField('id', $post_info->ID); $doc->setField('permalink', get_permalink($post_info->ID)); $doc->setField('wp', 'wp'); } $numcomments = 0; if ($index_comments) { $comments = get_comments("status=approve&post_id={$post_info->ID}"); foreach ($comments as $comment) { $doc->addField('comments', $comment->comment_content); $numcomments += 1; } } $doc->setField('title', $post_info->post_title); $doc->setField('content', strip_tags($post_info->post_content)); // rawcontent strips out characters lower than 0x20 $doc->setField('rawcontent', strip_tags(preg_replace('/[^(\\x20-\\x7F)\\x0A]*/', '', $post_info->post_content))); // contentnoshortcodes also strips characters below 0x20 but also strips shortcodes // used in WP to add images or other content, useful if you're pulling this data // into another system // // For example // [caption id="attachment_92495" align="alignright" width="160" caption="Duane Sand"][/caption] FARGO - Republican U.S. Senate... // // Will become // FARGO - Republican U.S. Senate... $doc->setField('contentnoshortcodes', strip_tags(preg_replace('/[^(\\x20-\\x7F)\\x0A]*/', '', strip_tags(strip_shortcodes($post_info->post_content))))); $doc->setField('numcomments', $numcomments); $doc->setField('author', $auth_info->display_name); $doc->setField('author_s', get_author_posts_url($auth_info->ID, $auth_info->user_nicename)); $doc->setField('type', $post_info->post_type); $doc->setField('date', s4w_format_date($post_info->post_date_gmt)); $doc->setField('tdate', s4w_format_date($post_info->post_date_gmt)); $doc->setField('modified', s4w_format_date($post_info->post_modified_gmt)); $doc->setField('displaydate', $post_info->post_date); $doc->setField('displaymodified', $post_info->post_modified); $categories = get_the_category($post_info->ID); if (!$categories == NULL) { foreach ($categories as $category) { if ($categoy_as_taxonomy) { $doc->addField('categories', get_category_parents($category->cat_ID, FALSE, '^^')); } else { $doc->addField('categories', $category->cat_name); } } } //get all the taxonomy names used by wp $taxonomies = (array) get_taxonomies(array('_builtin' => FALSE), 'names'); foreach ($taxonomies as $parent) { $terms = get_the_terms($post_info->ID, $parent); if ((array) $terms === $terms) { //we are creating *_taxonomy as dynamic fields using our schema //so lets set up all our taxonomies in that format $parent = $parent . "_taxonomy"; foreach ($terms as $term) { $doc->addField($parent, $term->name); } } } $tags = get_the_tags($post_info->ID); if (!$tags == NULL) { foreach ($tags as $tag) { $doc->addField('tags', $tag->name); } } if (count($index_custom_fields) > 0 && count($custom_fields = get_post_custom($post_info->ID))) { foreach ((array) $index_custom_fields as $field_name) { $field = (array) $custom_fields[$field_name]; foreach ($field as $key => $value) { $doc->addField($field_name . '_str', $value); $doc->addField($field_name . '_srch', $value); } } } // add full json respone $json_response = new JSON_API_Response(); $json_post = new JSON_API_Post_full(get_post($post_info->ID)); $response = (object) array('post' => $json_post); $doc->setField('json', $json_response->get_json($response)); // add tone: /nieuws/beste van het web/weblog/column/ $tone = 'Nieuws'; if ($blog_id > 1) { $nmt_blog_type = stripslashes(get_blog_option($blog_id, 'nmt_blog_type')); $nmt_blog_types = array(1 => 'Weblog', 2 => 'Column', 3 => 'Nieuws', 4 => 'Fotoserie'); $tone = $nmt_blog_types[$nmt_blog_type]; } else { $category_slugs = array(); if (!$categories == NULL) { foreach ($categories as $category) { $category_slugs[] = $category->slug; } } if (in_array('beste-van-het-web', $category_slugs)) { $tone = 'Beste van het web'; } } $doc->setField('tone', $tone); } else { // this will fire during blog sign up on multisite, not sure why _e('Post Information is NULL', 'solr4wp'); } syslog(LOG_ERR, "built document for {$blog_id} - {$domain}{$path} with title " . $post_info->post_title . " and status of " . $post_info->post_status); return $doc; }
protected function buildProductDocument($storeId, $productId, $indexableAttributes) { $helper = Mage::helper('solr'); $indexableAttributes = Mage::helper('solr/attribute')->getNamedProductAttributes($indexableAttributes); $product = Mage::getModel('catalog/product')->setStoreId($storeId)->load($productId); /** @var Mage_Catalog_Model_Product $product */ $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB); $host = parse_url($baseUrl, PHP_URL_HOST); $document = new Apache_Solr_Document(); $document->setField('appKey', 'Asm_Solr'); $document->setField('type', 'catalog/product'); $document->setField('id', $helper->getProductDocumentId($product->getEntityId())); $document->setField('site', $host); $document->setField('siteHash', $helper->getSiteHashForDomain($host)); $document->setField('storeId', $storeId); $document->setField('created', $helper->dateToIso($product->getCreatedAt())); $document->setField('changed', $helper->dateToIso($product->getUpdatedAt())); $document->setField('sku', $product->getSku()); $document->setField('productId', $product->getEntityId()); $categoryIds = $product->getCategoryIds(); foreach ($categoryIds as $categoryId) { $document->addField('categoryId', $categoryId); } $document->setField('isSalable', $product->isSalable()); $document->setField('inStock', $product->isInStock()); $document->setField('isVisible', $product->getStatus()); $document->setField('isVisibleInCatalog', $product->isVisibleInCatalog()); $document->setField('title', $product->getName()); $document->setField('content', $product->getDescription()); $document->setField('keywords', $helper->trimExplode(',', $product->getMetaKeyword(), true)); $document->setField('url', $product->getProductUrl()); $document->setField('price', $product->getPrice()); if ($product->getManufacturer()) { $document->setField('manufacturer', $product->getAttributeText('manufacturer')); } $document->setField('image_stringS', $product->getImage()); $document->setField('small_image_stringS', $product->getSmallImage()); $document->setField('thumbnail_stringS', $product->getThumbnail()); $productType = $product->getTypeId(); $document->setField('type_id_stringS', $productType); if ($productType == 'configurable') { $childProductAttributes = $this->getConfigurableProductChildProductAttributes($storeId, $product); $indexableAttributes = array_merge($indexableAttributes, $childProductAttributes); } $fieldProcessorFactory = Mage::getResourceModel('solr/indexer_fieldprocessor_factory'); // add other searchable attributes as dynamic fields foreach ($indexableAttributes as $attributeCode => $attributeValue) { if (empty($attributeValue) || in_array($attributeCode, $this->fixedSchemaFieldAttributes)) { // don't add fixed schema fields twice continue; } $fieldProcessor = $fieldProcessorFactory->getFieldProcessor($attributeCode, $attributeValue); $document->setField($fieldProcessor->getFieldName(), $fieldProcessor->getFieldValue()); } return $document; }
protected function _addAs($object, $base, $options) { $includeSubs = $options['include_children']; $doc = new Apache_Solr_Document(); // Always present fields $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); $doc->setField('ID', $object->ID); $doc->setField('ClassName', $object->ClassName); foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) { $doc->addField('ClassHierarchy', $class); } // Add the user-specified fields foreach ($this->getFieldsIterator() as $name => $field) { if ($field['base'] == $base) { $this->_addField($doc, $object, $field); } } try { $this->getService()->addDocument($doc); } catch (Exception $e) { SS_Log::log($e, SS_Log::WARN); return false; } return $doc; }
/** * modifies a document according to the given configuration * * @param Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = FALSE; if ($fieldInformation !== FALSE) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = TRUE; } switch ($instruction) { case 'timestampToIsoDate': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'pathToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PathToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'pageUidToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PageUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'categoryUidToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_CategoryUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }