/** * Adds fields to the document as defined in $indexingConfiguration * * @param \Apache_Solr_Document $document base document to add fields to * @param array $indexingConfiguration Indexing configuration / mapping * @param array $data Record data * @return \Apache_Solr_Document Modified document with added fields */ protected function addDocumentFieldsFromTyposcript(\Apache_Solr_Document $document, array $indexingConfiguration, array $data) { // mapping of record fields => solr document fields, resolving cObj foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) { if (is_array($recordFieldName)) { // configuration for a content object, skipping continue; } if (!self::isAllowedToOverrideField($solrFieldName)) { throw new InvalidFieldNameException('Must not overwrite field .' . $solrFieldName, 1435441863); } $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data); if (is_array($fieldValue)) { // multi value foreach ($fieldValue as $multiValue) { $document->addField($solrFieldName, $multiValue); } } else { if ($fieldValue !== '' && $fieldValue !== null) { $document->setField($solrFieldName, $fieldValue); } } } return $document; }
protected function getMockDocument($id) { $document = new Apache_Solr_Document(); $document->setField('id', $id); $document->setField('title', "Item {$id}"); return $document; }
public function addDocument($data) { $document = new Apache_Solr_Document(); foreach ($data as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { if (is_numeric($datum)) { number_format($datum); } $document->setMultiValue($key, $datum); } } else { if (is_numeric($value)) { number_format($value); } $document->{$key} = $value; } } try { $rs = $this->_solrService->addDocument($document); // asynchronous commit // $this->_solrService->commit(true); return $rs; } catch (Exception $e) { return $e->getMessage(); } }
/** * * @param Apache_Solr_Document or Array $parts * * @return SP_Controller_Action_Helper_Solr */ public function pushDocuments($parts) { $this->_setSolrService(); if ($parts instanceof Apache_Solr_Document) { $this->documents[] = $parts; } else { if (is_array($parts)) { foreach ($parts as $item => $fields) { if ($fields instanceof Apache_Solr_Document) { $this->documents[] = $fields; } else { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->setField($key, $value); } } $this->documents[] = $part; } } } else { trigger_error("the paramter \$part must be an object of Apache_Solr_Document or an array"); } } return $this; }
public function addDocuments($datas) { $documents = array(); foreach ($datas as $item => $fields) { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->{$key} = $value; } } $documents[] = $part; } try { $rs = $this->_solrService->addDocuments($documents); // asynchronous commit //$this->_solrService->commit(true); return $rs; } catch (Exception $e) { return $e->getMessage(); } }
/** * modifies a document according to the given configuration * * @param Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = FALSE; if ($fieldInformation !== FALSE) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = TRUE; } switch ($instruction) { case 'timestampToIsoDate': $processor = t3lib_div::makeInstance('tx_solr_fieldprocessor_TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }
public function createEntryDocument(entry $entry) { $document = new Apache_Solr_Document(); $document->plugins_data = ''; foreach (self::$solrFields as $solrField) { $fieldType = $solrField['type']; $func_name = "get" . $solrField['phpName']; if ($fieldType == "date") { $value = call_user_func(array($entry, $func_name), "%Y-%m-%dT%H:%M:%SZ"); } else { $value = call_user_func(array($entry, $func_name)); } //$value = $entry->getByName($solrField['phpName']); $solrName = $solrField['solrName']; switch ($solrField['type']) { case "array": if ($value != '') { $values = explode(",", $value); foreach ($vals as $value) { $document->addField($solrName, $value); } } break; default: $document->addField($solrName, $value); } } return $document; }
/** * add a document * @param array */ public function addDocument($doc) { $document = new Apache_Solr_Document(); foreach ($doc as $key => $val) { if (is_array($val)) { foreach ($val as $_val) { $document->addField($key, $_val); } } else { $document->addField($key, $val); } } $this->service->addDocument($document); }
/** * This takes an Omeka_Record instance and returns a populated * Apache_Solr_Document. * * @param Omeka_Record $item The record to index. * * @return Apache_Solr_Document * @author Eric Rochester <*****@*****.**> **/ public static function itemToDocument($item) { $fields = get_db()->getTable('SolrSearchField'); $doc = new Apache_Solr_Document(); $doc->setField('id', "Item_{$item->id}"); $doc->setField('resulttype', 'Item'); $doc->setField('model', 'Item'); $doc->setField('modelid', $item->id); // extend $doc to to include and items public / private status $doc->setField('public', $item->public); // Title: $title = metadata($item, array('Dublin Core', 'Title')); $doc->setField('title', $title); // Elements: self::indexItem($fields, $item, $doc); // Tags: foreach ($item->getTags() as $tag) { $doc->setMultiValue('tag', $tag->name); } // Collection: if ($collection = $item->getCollection()) { $doc->collection = metadata($collection, array('Dublin Core', 'Title')); } // Item type: if ($itemType = $item->getItemType()) { $doc->itemtype = $itemType->name; } $doc->featured = (bool) $item->featured; // File metadata foreach ($item->getFiles() as $file) { self::indexItem($fields, $file, $doc); } return $doc; }
/** * Builds an Apache_Solr_Document to pass to Apache_Solr_Service **/ public function buildDocument(array $document) { $doc = new Apache_Solr_Document(); foreach ($document as $fieldName => $field) { $value = $field['value']; // Apache_Solr_Document always expect arrays if (!is_array($value)) { $value = array($value); } if (isset($field['boost'])) { $doc->setField($fieldName, $value, $field['boost']); } else { $doc->setField($fieldName, $value); } } return $doc; }
public function cromSolar($id, $caso = null) { $adapter = $this->tableGateway->getAdapter(); $sql = new Sql($adapter); $selecttot = $sql->select()->from('ta_plato')->join(array('c' => 'ta_comentario'), 'c.ta_plato_in_id=ta_plato.in_id', array('cantidad' => new \Zend\Db\Sql\Expression('COUNT(c.in_id)')), 'left')->join('ta_tipo_plato', 'ta_plato.ta_tipo_plato_in_id=ta_tipo_plato.in_id ', array('tipo_plato_nombre' => 'va_nombre'), 'left')->join(array('pl' => 'ta_plato_has_ta_local'), 'pl.Ta_plato_in_id = ta_plato.in_id', array(), 'left')->join(array('tl' => 'ta_local'), 'tl.in_id = pl.Ta_local_in_id', array('latitud' => 'de_latitud', 'longitud' => 'de_longitud', 'direccion' => 'va_direccion', 'telefono' => 'va_telefono'), 'left')->join(array('tr' => 'ta_restaurante'), 'tr.in_id = tl.ta_restaurante_in_id', array('restaurant_nombre' => 'va_nombre', 'restaurant_estado' => 'en_estado'), 'left')->join(array('tc' => 'ta_tipo_comida'), 'tc.in_id = tr.Ta_tipo_comida_in_id', array('nombre_tipo_comida' => 'va_nombre_tipo'), 'left')->join(array('tu' => 'ta_ubigeo'), 'tu.in_id = tl.ta_ubigeo_in_id', array('distrito' => 'ch_distrito', 'departamento' => 'ch_departamento'), 'left')->where(array('ta_plato.in_id' => $id)); $selecttot->group('ta_plato.in_id'); $selectString = $sql->getSqlStringForSqlObject($selecttot); $results = $adapter->query($selectString, $adapter::QUERY_MODE_EXECUTE); $plato = $results->toArray(); $selectto = $sql->select()->from('ta_plato')->join(array('tpt' => 'ta_plato_has_ta_tag'), 'tpt.Ta_plato_in_id = ta_plato.in_id', array('tag_id' => 'ta_tag_in_id'), 'left')->join(array('tt' => 'ta_tag'), 'tt.in_id =tpt.ta_tag_in_id', array('tag' => 'va_nombre'), 'left')->where(array('ta_plato.in_id' => $id)); $selectStrin = $sql->getSqlStringForSqlObject($selectto); $result = $adapter->query($selectStrin, $adapter::QUERY_MODE_EXECUTE); $tag = $result->toArray(); $solr = \Classes\Solr::getInstance()->getSolr(); if ($solr->ping()) { if ($caso !== 1) { $solr->deleteByQuery('id:' . $id); } $document = new \Apache_Solr_Document(); $document->id = $id; $document->name = $plato[0]['va_nombre']; $document->tx_descripcion = $plato[0]['tx_descripcion']; $document->va_precio = $plato[0]['va_precio']; $document->en_estado = $plato[0]['en_estado']; $document->plato_tipo = $plato[0]['tipo_plato_nombre']; $document->va_direccion = $plato[0]['direccion']; $document->restaurante = $plato[0]['restaurant_nombre']; $document->tipo_comida = $plato[0]['nombre_tipo_comida']; $document->en_destaque = $plato[0]['en_destaque']; $document->va_telefono = $plato[0]['telefono']; $document->latitud = $plato[0]['latitud']; $document->longitud = $plato[0]['longitud']; $document->departamento = $plato[0]['departamento']; foreach ($tag as $resultado) { $document->setMultiValue('tag', $resultado['tag']); } $document->distrito = $plato[0]['distrito']; $document->va_imagen = $plato[0]['va_imagen']; $document->comentarios = $plato[0]['cantidad']; $document->restaurant_estado = $plato[0]['restaurant_estado']; $document->puntuacion = $plato[0]['Ta_puntaje_in_id']; $solr->addDocument($document); $solr->commit(); } }
/** * modifies a document according to the given configuration * * @param Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = FALSE; if ($fieldInformation !== FALSE) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = TRUE; } switch ($instruction) { case 'timestampToUtcIsoDate': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_TimestampToUtcIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'timestampToIsoDate': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'pathToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PathToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'pageUidToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PageUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'categoryUidToHierarchy': $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_CategoryUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }
/** * modifies a document according to the given configuration * * @param \Apache_Solr_Document $document * @param array $processingConfiguration */ public function processDocument(\Apache_Solr_Document $document, array $processingConfiguration) { foreach ($processingConfiguration as $fieldName => $instruction) { $fieldInformation = $document->getField($fieldName); $isSingleValueField = false; if ($fieldInformation !== false) { $fieldValue = $fieldInformation['value']; if (!is_array($fieldValue)) { // turn single value field into multi value field $fieldValue = array($fieldValue); $isSingleValueField = true; } switch ($instruction) { case 'timestampToUtcIsoDate': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToUtcIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'timestampToIsoDate': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToIsoDate'); $fieldValue = $processor->process($fieldValue); break; case 'pathToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PathToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'pageUidToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PageUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'categoryUidToHierarchy': $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\CategoryUidToHierarchy'); $fieldValue = $processor->process($fieldValue); break; case 'uppercase': $fieldValue = array_map('strtoupper', $fieldValue); break; } if ($isSingleValueField) { // turn multi value field back into single value field $fieldValue = $fieldValue[0]; } $document->setField($fieldName, $fieldValue); } } }
/** * @param string $name * @param array $arguments * @return null * @throws \Exception * @throws \RuntimeException */ public function __call($name, $arguments) { try { return parent::__call($name, $arguments); } catch (\RuntimeException $e) { if ($this->throwExceptions) { throw $e; } } }
/** * @test */ public function transformsUnixTimestampToIsoDateOnMultiValuedField() { $this->documentMock->addField('dateField', '1262343600'); // 2010-01-01 12:00 $this->documentMock->addField('dateField', '1262343601'); // 2010-01-01 12:01 $configuration = array('dateField' => 'timestampToIsoDate'); $this->service->processDocument($this->documentMock, $configuration); $value = $this->documentMock->getField('dateField'); $this->assertEquals($value['value'], array('2010-01-01T12:00:00Z', '2010-01-01T12:00:01Z'), 'field was not processed with timestampToIsoDate'); }
/** * Adds fields to the document as defined in $indexingConfiguration * * @param Apache_Solr_Document $document base document to add fields to * @param array $indexingConfiguration Indexing configuration / mapping * @param array $data Record data * @return Apache_Solr_Document Modified document with added fields */ protected function addDocumentFieldsFromTyposcript(Apache_Solr_Document $document, array $indexingConfiguration, array $data) { // mapping of record fields => solr document fields, resolving cObj foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) { if (is_array($recordFieldName)) { // configuration for a content object, skipping continue; } $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data); if (is_array($fieldValue)) { // multi value foreach ($fieldValue as $multiValue) { $document->addField($solrFieldName, $multiValue); } } else { $document->setField($solrFieldName, $fieldValue); } } return $document; }
/** * Method for adding an object from the database into the index. * * @param DataObject * @param string * @param array */ protected function _addAs($object, $base, $options) { $includeSubs = $options['include_children']; $doc = new Apache_Solr_Document(); // Always present fields $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); $doc->setField('ID', $object->ID); $doc->setField('ClassName', $object->ClassName); foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) { $doc->addField('ClassHierarchy', $class); } // Add the user-specified fields foreach ($this->getFieldsIterator() as $name => $field) { if ($field['base'] == $base) { $this->_addField($doc, $object, $field); } } // CUSTOM Duplicate index combined fields ("Title" rather than // "SiteTree_Title"). // // This allows us to sort on these fields without deeper architectural // changes to the fulltextsearch module. Note: We can't use <copyField> // for this purpose because it only writes into multiValue=true // fields, and those can't be (reliably) sorted on. $this->_addField($doc, $object, $this->getCustomPropertyFieldData('Title', $object)); $this->_addField($doc, $object, $this->getCustomPropertyFieldData('LastEdited', $object, 'SSDatetime')); $this->getService()->addDocument($doc); return $doc; }
/** * Build a Solr document for a specific file * * @param integer $storeId Store ID the file belongs to/where it is linked on a page * @param Asm_Solr_Model_Indexqueue_File $file The file to index * @return Apache_Solr_Document */ protected function buildFileDocument($storeId, Asm_Solr_Model_Indexqueue_File $file) { $helper = Mage::helper('solr'); $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB); $host = parse_url($baseUrl, PHP_URL_HOST); $document = new Apache_Solr_Document(); $document->setField('appKey', 'Asm_Solr'); $document->setField('type', 'solr/indexqueue_file'); $document->setField('id', $helper->getFileDocumentId($file->getId())); $document->setField('site', $host); $document->setField('siteHash', $helper->getSiteHashForDomain($host)); $document->setField('storeId', $storeId); $document->setField('changed', $helper->dateToIso($file->getFileLastChangedTime())); $document->setField('productId', 0); $document->setField('sku', 'solr/indexqueue_file'); $document->setField('title', $file->getName()); $document->setField('content', $file->getContent()); $document->setField('url', $file->getUrl()); return $document; }
public function testClearReturnsDocumentToDefaultState() { // set the document boost $this->_fixture->setBoost(0.5); // set a field $this->_fixture->someField = "some value"; // clear the document to remove boost and fields $this->_fixture->clear(); // document boost should now be false $this->assertFalse($this->_fixture->getBoost()); // document fields should now be empty $this->assertEquals(0, count($this->_fixture->getFieldNames())); $this->assertEquals(0, count($this->_fixture->getFieldValues())); $this->assertEquals(0, count($this->_fixture->getFieldBoosts())); // document iterator should now be empty $this->assertEquals(0, iterator_count($this->_fixture)); }
/** * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call * * @return string */ protected function _documentToXmlFragment(Apache_Solr_Document $document) { $xml = '<doc'; if ($document->getBoost() !== false) { $xml .= ' boost="' . $document->getBoost() . '"'; } $xml .= '>'; foreach ($document as $key => $value) { $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); $fieldBoost = $document->getFieldBoost($key); if ($key) { if (is_array($value)) { foreach ($value as $multivalue) { $xml .= '<field name="' . $key . '"'; if ($fieldBoost !== false) { $xml .= ' boost="' . $fieldBoost . '"'; // only set the boost for the first field in the set $fieldBoost = false; } if (!mb_check_encoding($multivalue, 'UTF-8')) { $multivalue = utf8_encode($multivalue); } $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); $xml .= '>' . $multivalue . '</field>'; } } else { $xml .= '<field name="' . $key . '"'; if ($fieldBoost !== false) { $xml .= ' boost="' . $fieldBoost . '"'; } if (!mb_check_encoding($value, 'UTF-8')) { $value = utf8_encode($value); } $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); $xml .= '>' . $value . '</field>'; } } } $xml .= '</doc>'; return $xml; }
{ $authors[] = $a->forename . ' ' . $a->surname; } $item['authors'] = $authors; $item['citation'] = reference_authors_to_text_string($reference) . ' ' . $reference->year . ' ' . $reference->title . ' ' . reference_to_citation_text_string($reference); */ print_r($item); $parts[] = $item; } print_r($parts); $documents = array(); foreach ($parts as $item => $fields) { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->{$key} = $value; } } $documents[] = $part; } // // // Load the documents into the index //
/** * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call * * @return string */ protected function _documentToXmlFragment(Apache_Solr_Document $document) { $xml = '<doc'; if ($document->getBoost() !== false) { $xml .= ' boost="' . $document->getBoost() . '"'; } $xml .= '>'; foreach ($document as $key => $value) { $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); $fieldBoost = $document->getFieldBoost($key); if (is_array($value)) { foreach ($value as $multivalue) { $xml .= '<field name="' . $key . '"'; if ($fieldBoost !== false) { $xml .= ' boost="' . $fieldBoost . '"'; // only set the boost for the first field in the set $fieldBoost = false; } $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); $xml .= '>' . $multivalue . '</field>'; } } else { $xml .= '<field name="' . $key . '"'; if ($fieldBoost !== false) { $xml .= ' boost="' . $fieldBoost . '"'; } $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); $xml .= '>' . $value . '</field>'; } } $xml .= '</doc>'; // replace any control characters to avoid Solr XML parser exception return $this->_stripCtrlChars($xml); }
/** * Build a Solr document for a given page * * @param integer $storeId Store ID * @param Mage_Cms_Model_Page $page Page instance * @return Apache_Solr_Document */ protected function buildPageDocument($storeId, $page) { $helper = Mage::helper('solr'); $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB); $host = parse_url($baseUrl, PHP_URL_HOST); $document = new Apache_Solr_Document(); $document->setField('appKey', 'Asm_Solr'); $document->setField('type', 'cms/page'); $document->setField('id', $helper->getPageDocumentId($page->getId())); $document->setField('site', $host); $document->setField('siteHash', $helper->getSiteHashForDomain($host)); $document->setField('storeId', $storeId); $document->setField('created', $helper->dateToIso($page->getCreationTime())); $document->setField('changed', $helper->dateToIso($page->getUpdateTime())); $document->setField('sku', 'cms/page'); $document->setField('productId', 0); $document->setField('pageId', $page->getId()); $document->setField('title', $page->getTitle()); $document->setField('content', Mage::helper('solr/contentExtractor')->getIndexableContent($page->getContent())); $document->setField('keywords', $helper->trimExplode(',', $page->getMetaKeywords(), true)); $document->setField('url', Mage::helper('cms/page')->getPageUrl($page->getId())); return $document; }
/** * Processes a physical unit for the Solr index * * @access protected * * @param tx_dlf_document &$doc: The METS document * @param integer $page: The page number * @param array $physicalUnit: Array of the physical unit to process * * @return integer 0 on success or 1 on failure */ protected static function processPhysical(tx_dlf_document &$doc, $page, array $physicalUnit) { $errors = 0; // Read extension configuration. $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]); if (!empty($physicalUnit['files'][$extConf['fileGrpFulltext']])) { $file = $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpFulltext']]); // Load XML file. if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($file) || version_compare(phpversion(), '5.3.3', '<')) { // Set user-agent to identify self when fetching XML data. if (!empty($extConf['useragent'])) { @ini_set('user_agent', $extConf['useragent']); } // Turn off libxml's error logging. $libxmlErrors = libxml_use_internal_errors(TRUE); // disable entity loading $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE); // Load XML from file. $xml = simplexml_load_string(file_get_contents($file)); // reset entity loader setting libxml_disable_entity_loader($previousValueOfEntityLoader); // Reset libxml's error logging. libxml_use_internal_errors($libxmlErrors); if ($xml === FALSE) { return 1; } } else { return 1; } // Load class. if (!class_exists('Apache_Solr_Document')) { require_once \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Document.php'); } // Create new Solr document. $solrDoc = new Apache_Solr_Document(); // Create unique identifier from document's UID and unit's XML ID. $solrDoc->setField('id', $doc->uid . $physicalUnit['id']); $solrDoc->setField('uid', $doc->uid); $solrDoc->setField('pid', $doc->pid); $solrDoc->setField('page', $page); if (!empty($physicalUnit['files'][$extConf['fileGrpThumbs']])) { $solrDoc->setField('thumbnail', $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpThumbs']])); } $solrDoc->setField('partof', $doc->parentId); $solrDoc->setField('root', $doc->rootId); $solrDoc->setField('sid', $physicalUnit['id']); $solrDoc->setField('toplevel', FALSE); $solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']); $solrDoc->setField('fulltext', tx_dlf_alto::getRawText($xml)); try { self::$solr->service->addDocument($solrDoc); } catch (Exception $e) { if (!defined('TYPO3_cliMode')) { $message = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Messaging\\FlashMessage', tx_dlf_helper::getLL('flash.solrException', TRUE) . '<br />' . htmlspecialchars($e->getMessage()), tx_dlf_helper::getLL('flash.error', TRUE), \TYPO3\CMS\Core\Messaging\FlashMessage::ERROR, TRUE); tx_dlf_helper::addMessage($message); } return 1; } } return $errors; }
public function indexPage(Kwf_Component_Data $page, $debugOutput = false) { if (Kwc_Abstract::getFlag($page->componentClass, 'skipFulltext')) { return; } //performance //echo "checking for childComponents\n"; $fulltextComponents = $this->getFulltextComponents($page); if ($fulltextComponents) { if ($debugOutput) { echo " *** indexing {$page->componentId} {$page->url}..."; } $contents = $this->getFulltextContentForPage($page, $fulltextComponents); unset($fulltextComponents); if (!$contents) { if ($debugOutput) { echo " [no content]\n"; } return false; } if ($debugOutput) { echo " [" . implode(' ', array_keys($contents)) . "]\n"; } $doc = new Apache_Solr_Document(); foreach ($contents as $field => $text) { if ($text instanceof Kwf_DateTime) { $text = gmdate('Y-m-d\\TH:i:s\\Z', $text->getTimestamp()); } $doc->addField($field, $text); } $doc->addField('componentId', $page->componentId); $response = $this->_getSolrService($page)->addDocument($doc); if ($response->getHttpStatus() != 200) { throw new Kwf_Exception("addDocument failed"); } $this->_getSolrService($page)->commit(); $this->_afterIndex($page); return true; } return false; }
function db_store_article($article, $PageID = 0, $updating = false) { global $db; global $config; $update = false; $id = 0; // If we are editing an existing reference then we already know its id if (isset($article->reference_id)) { $id = $article->reference_id; } else { $id = db_find_article($article); } if ($id != 0) { if ($updating) { $update = true; } else { return $id; } } // Try and trap empty references if ($id == 0) { $ok = false; if (isset($article->title)) { $ok = $article->title != ''; } if (!$ok) { return 0; } } if (!isset($article->genre)) { $article->genre = 'article'; } $keys = array(); $values = array(); // Article metadata foreach ($article as $k => $v) { switch ($k) { // Ignore as it's an array case 'authors': break; case 'date': $keys[] = 'date'; $values[] = $db->qstr($v); if (!isset($article->year)) { $keys[] = 'year'; $values[] = $db->qstr(year_from_date($v)); } break; // Don't store BHL URL here // Don't store BHL URL here case 'url': if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) { } else { // extract Handle if it exists if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) { $keys[] = 'hdl'; $values[] = $db->qstr($m['hdl']); } else { $keys[] = $k; $values[] = $db->qstr($v); } } break; // Things we store as is // Things we store as is case 'title': case 'secondary_title': case 'volume': case 'series': case 'issue': case 'spage': case 'epage': case 'year': case 'date': case 'issn': case 'genre': case 'doi': case 'hdl': case 'lsid': case 'oclc': case 'pdf': case 'abstract': case 'pmid': $keys[] = $k; $values[] = $db->qstr($v); break; // Things we ignore // Things we ignore default: break; } } // Date if (!isset($article->date) && isset($article->year)) { $keys[] = 'date'; $values[] = $db->qstr($article->year . '-00-00'); } // BHL PageID if ($PageID != 0) { $keys[] = 'PageID'; $values[] = $PageID; } // SICI $s = new Sici(); $sici = $s->create($article); if ($sici != '') { $keys[] = 'sici'; $values[] = $db->qstr($sici); } if ($update) { // Versioning? // Delete links (author, pages, etc) // Don't delete page range as we may loose plates, etc. outside range /* $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); */ $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // update (updated timestamp will be automatically updated) $sql = 'UPDATE rdmp_reference SET '; $num_values = count($keys); for ($i = 0; $i < $num_values; $i++) { if ($i > 0) { $sql .= ', '; } $sql .= $keys[$i] . '=' . $values[$i]; } $sql .= ' WHERE reference_id=' . $id; /* $cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file"); @fwrite($cache_file, $sql); fclose($cache_file); */ $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } else { // Adding article for first time so add 'created' and 'updated' timestamp $keys[] = 'created'; $values[] = 'NOW()'; $keys[] = 'updated'; $values[] = 'NOW()'; $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } $id = $db->Insert_ID(); // Store reference_cluster_id which we can use to group duplicates, by default // reference_cluster_id = reference_id $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } // Indexing------------------------------------------------------------------------------------- if (1) { // solr // this code is redundant with code in reference.php but I use different objects // here and there (doh!). Also once we've added old stuff to solr this is the only place we // should be calling solr $solr = new Apache_Solr_Service('localhost', '8983', '/solr'); if (!$solr->ping()) { echo 'Solr service not responding.'; exit; } $item = array(); $item['id'] = 'reference/' . $id; $item['title'] = $article->title; $item['publication_outlet'] = $article->secondary_title; $item['year'] = $article->year; $authors = array(); foreach ($article->authors as $a) { $authors[] = $a->forename . ' ' . $a->surname; } $item['authors'] = $authors; $citation = ''; $citation .= ' ' . $article->year; $citation .= ' ' . $article->title; $citation .= ' ' . $article->secondary_title; $citation .= ' ' . $article->volume; if (isset($article->issue)) { $citation .= '(' . $article->issue . ')'; } $citation .= ':'; $citation .= ' '; $citation .= $article->spage; if (isset($article->epage)) { $citation .= '-' . $article->epage; } $item['citation'] = $citation; $text = ''; $num_authors = count($article->authors); $count = 0; if ($num_authors > 0) { foreach ($article->authors as $author) { $text .= $author->forename . ' ' . $author->lastname; if (isset($author->suffix)) { $text .= ' ' . $author->suffix; } $count++; if ($count == 2 && $num_authors > 3) { $text .= ' et al.'; break; } if ($count < $num_authors - 1) { $text .= ', '; } else { if ($count < $num_authors) { $text .= ' and '; } } } } $item['citation'] = $text . ' ' . $citation; $parts = array(); $parts[] = $item; //print_r($parts); // add to solr $documents = array(); foreach ($parts as $item => $fields) { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->{$key} = $value; } } $documents[] = $part; } // // // Load the documents into the index // try { $solr->addDocuments($documents); $solr->commit(); $solr->optimize(); } catch (Exception $e) { echo $e->getMessage(); } } else { $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata) if (isset($article->title)) { $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text) VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Versioning----------------------------------------------------------------------------------- // Store this object in version table so we can recover it if we overwrite item $ip = getip(); $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Author(s)------------------------------------------------------------------------------------ // Store author as and link to the article if (isset($article->authors)) { db_store_authors($id, $article->authors); } // Store page range (only if not updating, otherwise we may loose plates, etc. // that aren't in page range) if ($PageID != 0 && !$update) { $page_range = array(); if (isset($article->spage) && isset($article->epage)) { $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1); } else { // No epage, so just get spage (to do: how do we tell user we don't have page range?) $page_range = bhl_page_range($PageID, 0); } //print_r($page_range); $count = 0; foreach ($page_range as $page) { $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) VALUES (' . $id . ',' . $page . ',' . $count++ . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Tweet---------------------------------------------------------------------------------------- if (!$update) { if ($config['twitter']) { $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib'; // url + hashtag $url_len = strlen($url); $status = ''; if (isset($article->title)) { $status = $article->title; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } } $status .= ' ' . $url; tweet($status); } } return $id; }
<?php require dirname(__FILE__) . '/../../bootstrap/unit.php'; $t = new limeade_test(2, limeade_output::get()); $t->diag('document format a document'); $document = new Apache_Solr_Document(); $document->setBoost(10); $document->setField('sfl_guid', 'GUID_1234'); $document->setField('name', 'Thomas Rabaix', 1); $document->setMultiValue('skills', 'php'); $document->setMultiValue('skills', 'symfony'); $document->addField('skills', 'objective-c'); $expected = array('name' => 'skills', 'value' => array(0 => 'php', 1 => 'symfony', 2 => 'objective-c'), 'boost' => false); $t->cmp_ok($document->getField('skills'), '==', $expected, '::getField test multivalue setter'); $expected = array('name' => 'name', 'value' => 'Thomas Rabaix', 'boost' => 1); $t->cmp_ok($document->getField('name'), '==', $expected, '::getField test setter');
public function addResource($resource) { if (!is_array($resource)) { return false; } $fields = array('id' => $resource['Resource']['id'], 'sha' => $resource['Resource']['sha'], 'user' => $resource['User']['name'], 'filetype' => $resource['Resource']['mime_type'], 'filename' => $resource['Resource']['file_name'], 'type' => $resource['Resource']['type'], 'title' => $resource['Resource']['title'], 'public' => $resource['Resource']['public'], 'modified' => $this->_formatDate($resource['Resource']['modified']), 'created' => $this->_formatDate($resource['Resource']['created']), 'comment' => \_\pluck($resource['Comment'], 'content'), 'annotation' => \_\pluck($resource['Annotation'], 'caption'), 'keyword' => \_\pluck($resource['Keyword'], 'keyword'), 'collection' => $resource['Collection'] ?: array()); $document = new \Apache_Solr_Document(); foreach ($fields as $key => $val) { if (is_array($val)) { foreach ($val as $subval) { $document->addField($key, $subval); } } else { $document->{$key} = $val; } } foreach ($resource['Metadatum'] as $m) { $document->addField($m['attribute'] . '_t', $m['value']); } $this->solr->addDocument($document); $this->solr->commit(); $this->solr->optimize(); }
/** * This returns an Apache_Solr_Document to index, if the addons say it * should be. * * @param Omeka_Record $record The record to index. * @param associative array of SolrSearch_Addon_Addon $addons The * configuration controlling how records are indexed. * * @return Apache_Solr_Document|null * @author Eric Rochester <*****@*****.**> **/ public function indexRecord($record, $addon) { $doc = new Apache_Solr_Document(); $doc->id = "{$addon->table}_{$record->id}"; $doc->addField('model', $addon->table); $doc->addField('modelid', $record->id); $titleField = $addon->getTitleField(); foreach ($addon->fields as $field) { $solrName = $this->makeSolrName($addon, $field->name); if (is_null($field->remote)) { $value = $this->getLocalValue($record, $field); } else { $value = $this->getRemoteValue($record, $field); } foreach ($value as $v) { $doc->addField($solrName, $v); if (!is_null($titleField) && $titleField->name === $field->name) { $doc->addField('title', $v); } } } if ($addon->tagged) { foreach ($record->getTags() as $tag) { $doc->addField('tag', $tag->name); } } if ($addon->resultType) { $doc->addField('resulttype', $addon->resultType); } return $doc; }
/** * takes a search result document and processes its fields according to the * instructions configured in TS. Currently available instructions are * * timestamp - converts a date field into a unix timestamp * * serialize - uses serialize() to encode multivalue fields which then can be put out using the MULTIVALUE view helper * * skip - skips the whole field so that it is not available in the result, useful for the spell field f.e. * The default is to do nothing and just add the document's field to the * resulting array. * * @param \Apache_Solr_Document $document the Apache_Solr_Document result document * @return array An array with field values processed like defined in TS */ protected function processDocumentFieldsToArray(\Apache_Solr_Document $document) { $processingInstructions = $this->configuration->getSearchResultsFieldProcessingInstructionsConfiguration(); $availableFields = $document->getFieldNames(); $result = array(); foreach ($availableFields as $fieldName) { $processingInstruction = $processingInstructions[$fieldName]; // TODO switch to field processors // TODO allow to have multiple (comma-separated) instructions for each field switch ($processingInstruction) { case 'timestamp': $processedFieldValue = Util::isoToTimestamp($document->{$fieldName}); break; case 'serialize': if (!empty($document->{$fieldName})) { $processedFieldValue = serialize($document->{$fieldName}); } else { $processedFieldValue = ''; } break; case 'skip': continue 2; default: $processedFieldValue = $document->{$fieldName}; } // escape markers in document fields // TODO remove after switching to fluid templates $processedFieldValue = Template::escapeMarkers($processedFieldValue); $result[$fieldName] = $processedFieldValue; } return $result; }