Esempio n. 1
0
 /**
  * Adds fields to the document as defined in $indexingConfiguration
  *
  * @param \Apache_Solr_Document $document base document to add fields to
  * @param array $indexingConfiguration Indexing configuration / mapping
  * @param array $data Record data
  * @return \Apache_Solr_Document Modified document with added fields
  */
 protected function addDocumentFieldsFromTyposcript(\Apache_Solr_Document $document, array $indexingConfiguration, array $data)
 {
     // mapping of record fields => solr document fields, resolving cObj
     foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) {
         if (is_array($recordFieldName)) {
             // configuration for a content object, skipping
             continue;
         }
         if (!self::isAllowedToOverrideField($solrFieldName)) {
             throw new InvalidFieldNameException('Must not overwrite field .' . $solrFieldName, 1435441863);
         }
         $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data);
         if (is_array($fieldValue)) {
             // multi value
             foreach ($fieldValue as $multiValue) {
                 $document->addField($solrFieldName, $multiValue);
             }
         } else {
             if ($fieldValue !== '' && $fieldValue !== null) {
                 $document->setField($solrFieldName, $fieldValue);
             }
         }
     }
     return $document;
 }
 protected function getMockDocument($id)
 {
     $document = new Apache_Solr_Document();
     $document->setField('id', $id);
     $document->setField('title', "Item {$id}");
     return $document;
 }
Esempio n. 3
0
 public function addDocument($data)
 {
     $document = new Apache_Solr_Document();
     foreach ($data as $key => $value) {
         if (is_array($value)) {
             foreach ($value as $datum) {
                 if (is_numeric($datum)) {
                     number_format($datum);
                 }
                 $document->setMultiValue($key, $datum);
             }
         } else {
             if (is_numeric($value)) {
                 number_format($value);
             }
             $document->{$key} = $value;
         }
     }
     try {
         $rs = $this->_solrService->addDocument($document);
         // asynchronous commit
         // $this->_solrService->commit(true);
         return $rs;
     } catch (Exception $e) {
         return $e->getMessage();
     }
 }
Esempio n. 4
0
 /**
  * 
  * @param Apache_Solr_Document or Array $parts
  * 
  * @return SP_Controller_Action_Helper_Solr
  */
 public function pushDocuments($parts)
 {
     $this->_setSolrService();
     if ($parts instanceof Apache_Solr_Document) {
         $this->documents[] = $parts;
     } else {
         if (is_array($parts)) {
             foreach ($parts as $item => $fields) {
                 if ($fields instanceof Apache_Solr_Document) {
                     $this->documents[] = $fields;
                 } else {
                     $part = new Apache_Solr_Document();
                     foreach ($fields as $key => $value) {
                         if (is_array($value)) {
                             foreach ($value as $datum) {
                                 $part->setMultiValue($key, $datum);
                             }
                         } else {
                             $part->setField($key, $value);
                         }
                     }
                     $this->documents[] = $part;
                 }
             }
         } else {
             trigger_error("the paramter \$part must be an object of Apache_Solr_Document or an array");
         }
     }
     return $this;
 }
Esempio n. 5
0
 public function addDocuments($datas)
 {
     $documents = array();
     foreach ($datas as $item => $fields) {
         $part = new Apache_Solr_Document();
         foreach ($fields as $key => $value) {
             if (is_array($value)) {
                 foreach ($value as $datum) {
                     $part->setMultiValue($key, $datum);
                 }
             } else {
                 $part->{$key} = $value;
             }
         }
         $documents[] = $part;
     }
     try {
         $rs = $this->_solrService->addDocuments($documents);
         // asynchronous commit
         //$this->_solrService->commit(true);
         return $rs;
     } catch (Exception $e) {
         return $e->getMessage();
     }
 }
 /**
  * modifies a document according to the given configuration
  *
  * @param	Apache_Solr_Document	$document
  * @param	array	$processingConfiguration
  */
 public function processDocument(Apache_Solr_Document $document, array $processingConfiguration)
 {
     foreach ($processingConfiguration as $fieldName => $instruction) {
         $fieldInformation = $document->getField($fieldName);
         $isSingleValueField = FALSE;
         if ($fieldInformation !== FALSE) {
             $fieldValue = $fieldInformation['value'];
             if (!is_array($fieldValue)) {
                 // turn single value field into multi value field
                 $fieldValue = array($fieldValue);
                 $isSingleValueField = TRUE;
             }
             switch ($instruction) {
                 case 'timestampToIsoDate':
                     $processor = t3lib_div::makeInstance('tx_solr_fieldprocessor_TimestampToIsoDate');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'uppercase':
                     $fieldValue = array_map('strtoupper', $fieldValue);
                     break;
             }
             if ($isSingleValueField) {
                 // turn multi value field back into single value field
                 $fieldValue = $fieldValue[0];
             }
             $document->setField($fieldName, $fieldValue);
         }
     }
 }
Esempio n. 7
0
 public function createEntryDocument(entry $entry)
 {
     $document = new Apache_Solr_Document();
     $document->plugins_data = '';
     foreach (self::$solrFields as $solrField) {
         $fieldType = $solrField['type'];
         $func_name = "get" . $solrField['phpName'];
         if ($fieldType == "date") {
             $value = call_user_func(array($entry, $func_name), "%Y-%m-%dT%H:%M:%SZ");
         } else {
             $value = call_user_func(array($entry, $func_name));
         }
         //$value = $entry->getByName($solrField['phpName']);
         $solrName = $solrField['solrName'];
         switch ($solrField['type']) {
             case "array":
                 if ($value != '') {
                     $values = explode(",", $value);
                     foreach ($vals as $value) {
                         $document->addField($solrName, $value);
                     }
                 }
                 break;
             default:
                 $document->addField($solrName, $value);
         }
     }
     return $document;
 }
Esempio n. 8
0
 /**
  * add a document
  * @param array
  */
 public function addDocument($doc)
 {
     $document = new Apache_Solr_Document();
     foreach ($doc as $key => $val) {
         if (is_array($val)) {
             foreach ($val as $_val) {
                 $document->addField($key, $_val);
             }
         } else {
             $document->addField($key, $val);
         }
     }
     $this->service->addDocument($document);
 }
 /**
  * This takes an Omeka_Record instance and returns a populated
  * Apache_Solr_Document.
  *
  * @param Omeka_Record $item The record to index.
  *
  * @return Apache_Solr_Document
  * @author Eric Rochester <*****@*****.**>
  **/
 public static function itemToDocument($item)
 {
     $fields = get_db()->getTable('SolrSearchField');
     $doc = new Apache_Solr_Document();
     $doc->setField('id', "Item_{$item->id}");
     $doc->setField('resulttype', 'Item');
     $doc->setField('model', 'Item');
     $doc->setField('modelid', $item->id);
     // extend $doc to to include and items public / private status
     $doc->setField('public', $item->public);
     // Title:
     $title = metadata($item, array('Dublin Core', 'Title'));
     $doc->setField('title', $title);
     // Elements:
     self::indexItem($fields, $item, $doc);
     // Tags:
     foreach ($item->getTags() as $tag) {
         $doc->setMultiValue('tag', $tag->name);
     }
     // Collection:
     if ($collection = $item->getCollection()) {
         $doc->collection = metadata($collection, array('Dublin Core', 'Title'));
     }
     // Item type:
     if ($itemType = $item->getItemType()) {
         $doc->itemtype = $itemType->name;
     }
     $doc->featured = (bool) $item->featured;
     // File metadata
     foreach ($item->getFiles() as $file) {
         self::indexItem($fields, $file, $doc);
     }
     return $doc;
 }
 /**
  * Builds an Apache_Solr_Document to pass to Apache_Solr_Service
  **/
 public function buildDocument(array $document)
 {
     $doc = new Apache_Solr_Document();
     foreach ($document as $fieldName => $field) {
         $value = $field['value'];
         // Apache_Solr_Document always expect arrays
         if (!is_array($value)) {
             $value = array($value);
         }
         if (isset($field['boost'])) {
             $doc->setField($fieldName, $value, $field['boost']);
         } else {
             $doc->setField($fieldName, $value);
         }
     }
     return $doc;
 }
Esempio n. 11
0
 public function cromSolar($id, $caso = null)
 {
     $adapter = $this->tableGateway->getAdapter();
     $sql = new Sql($adapter);
     $selecttot = $sql->select()->from('ta_plato')->join(array('c' => 'ta_comentario'), 'c.ta_plato_in_id=ta_plato.in_id', array('cantidad' => new \Zend\Db\Sql\Expression('COUNT(c.in_id)')), 'left')->join('ta_tipo_plato', 'ta_plato.ta_tipo_plato_in_id=ta_tipo_plato.in_id ', array('tipo_plato_nombre' => 'va_nombre'), 'left')->join(array('pl' => 'ta_plato_has_ta_local'), 'pl.Ta_plato_in_id = ta_plato.in_id', array(), 'left')->join(array('tl' => 'ta_local'), 'tl.in_id = pl.Ta_local_in_id', array('latitud' => 'de_latitud', 'longitud' => 'de_longitud', 'direccion' => 'va_direccion', 'telefono' => 'va_telefono'), 'left')->join(array('tr' => 'ta_restaurante'), 'tr.in_id = tl.ta_restaurante_in_id', array('restaurant_nombre' => 'va_nombre', 'restaurant_estado' => 'en_estado'), 'left')->join(array('tc' => 'ta_tipo_comida'), 'tc.in_id = tr.Ta_tipo_comida_in_id', array('nombre_tipo_comida' => 'va_nombre_tipo'), 'left')->join(array('tu' => 'ta_ubigeo'), 'tu.in_id = tl.ta_ubigeo_in_id', array('distrito' => 'ch_distrito', 'departamento' => 'ch_departamento'), 'left')->where(array('ta_plato.in_id' => $id));
     $selecttot->group('ta_plato.in_id');
     $selectString = $sql->getSqlStringForSqlObject($selecttot);
     $results = $adapter->query($selectString, $adapter::QUERY_MODE_EXECUTE);
     $plato = $results->toArray();
     $selectto = $sql->select()->from('ta_plato')->join(array('tpt' => 'ta_plato_has_ta_tag'), 'tpt.Ta_plato_in_id = ta_plato.in_id', array('tag_id' => 'ta_tag_in_id'), 'left')->join(array('tt' => 'ta_tag'), 'tt.in_id =tpt.ta_tag_in_id', array('tag' => 'va_nombre'), 'left')->where(array('ta_plato.in_id' => $id));
     $selectStrin = $sql->getSqlStringForSqlObject($selectto);
     $result = $adapter->query($selectStrin, $adapter::QUERY_MODE_EXECUTE);
     $tag = $result->toArray();
     $solr = \Classes\Solr::getInstance()->getSolr();
     if ($solr->ping()) {
         if ($caso !== 1) {
             $solr->deleteByQuery('id:' . $id);
         }
         $document = new \Apache_Solr_Document();
         $document->id = $id;
         $document->name = $plato[0]['va_nombre'];
         $document->tx_descripcion = $plato[0]['tx_descripcion'];
         $document->va_precio = $plato[0]['va_precio'];
         $document->en_estado = $plato[0]['en_estado'];
         $document->plato_tipo = $plato[0]['tipo_plato_nombre'];
         $document->va_direccion = $plato[0]['direccion'];
         $document->restaurante = $plato[0]['restaurant_nombre'];
         $document->tipo_comida = $plato[0]['nombre_tipo_comida'];
         $document->en_destaque = $plato[0]['en_destaque'];
         $document->va_telefono = $plato[0]['telefono'];
         $document->latitud = $plato[0]['latitud'];
         $document->longitud = $plato[0]['longitud'];
         $document->departamento = $plato[0]['departamento'];
         foreach ($tag as $resultado) {
             $document->setMultiValue('tag', $resultado['tag']);
         }
         $document->distrito = $plato[0]['distrito'];
         $document->va_imagen = $plato[0]['va_imagen'];
         $document->comentarios = $plato[0]['cantidad'];
         $document->restaurant_estado = $plato[0]['restaurant_estado'];
         $document->puntuacion = $plato[0]['Ta_puntaje_in_id'];
         $solr->addDocument($document);
         $solr->commit();
     }
 }
Esempio n. 12
0
 /**
  * modifies a document according to the given configuration
  *
  * @param Apache_Solr_Document $document
  * @param array $processingConfiguration
  */
 public function processDocument(Apache_Solr_Document $document, array $processingConfiguration)
 {
     foreach ($processingConfiguration as $fieldName => $instruction) {
         $fieldInformation = $document->getField($fieldName);
         $isSingleValueField = FALSE;
         if ($fieldInformation !== FALSE) {
             $fieldValue = $fieldInformation['value'];
             if (!is_array($fieldValue)) {
                 // turn single value field into multi value field
                 $fieldValue = array($fieldValue);
                 $isSingleValueField = TRUE;
             }
             switch ($instruction) {
                 case 'timestampToUtcIsoDate':
                     $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_TimestampToUtcIsoDate');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'timestampToIsoDate':
                     $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_TimestampToIsoDate');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'pathToHierarchy':
                     $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PathToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'pageUidToHierarchy':
                     $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_PageUidToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'categoryUidToHierarchy':
                     $processor = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Tx_Solr_FieldProcessor_CategoryUidToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'uppercase':
                     $fieldValue = array_map('strtoupper', $fieldValue);
                     break;
             }
             if ($isSingleValueField) {
                 // turn multi value field back into single value field
                 $fieldValue = $fieldValue[0];
             }
             $document->setField($fieldName, $fieldValue);
         }
     }
 }
Esempio n. 13
0
 /**
  * modifies a document according to the given configuration
  *
  * @param \Apache_Solr_Document $document
  * @param array $processingConfiguration
  */
 public function processDocument(\Apache_Solr_Document $document, array $processingConfiguration)
 {
     foreach ($processingConfiguration as $fieldName => $instruction) {
         $fieldInformation = $document->getField($fieldName);
         $isSingleValueField = false;
         if ($fieldInformation !== false) {
             $fieldValue = $fieldInformation['value'];
             if (!is_array($fieldValue)) {
                 // turn single value field into multi value field
                 $fieldValue = array($fieldValue);
                 $isSingleValueField = true;
             }
             switch ($instruction) {
                 case 'timestampToUtcIsoDate':
                     $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToUtcIsoDate');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'timestampToIsoDate':
                     $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\TimestampToIsoDate');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'pathToHierarchy':
                     $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PathToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'pageUidToHierarchy':
                     $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\PageUidToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'categoryUidToHierarchy':
                     $processor = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\CategoryUidToHierarchy');
                     $fieldValue = $processor->process($fieldValue);
                     break;
                 case 'uppercase':
                     $fieldValue = array_map('strtoupper', $fieldValue);
                     break;
             }
             if ($isSingleValueField) {
                 // turn multi value field back into single value field
                 $fieldValue = $fieldValue[0];
             }
             $document->setField($fieldName, $fieldValue);
         }
     }
 }
Esempio n. 14
0
 /**
  * @param string $name
  * @param array $arguments
  * @return null
  * @throws \Exception
  * @throws \RuntimeException
  */
 public function __call($name, $arguments)
 {
     try {
         return parent::__call($name, $arguments);
     } catch (\RuntimeException $e) {
         if ($this->throwExceptions) {
             throw $e;
         }
     }
 }
Esempio n. 15
0
 /**
  * @test
  */
 public function transformsUnixTimestampToIsoDateOnMultiValuedField()
 {
     $this->documentMock->addField('dateField', '1262343600');
     // 2010-01-01 12:00
     $this->documentMock->addField('dateField', '1262343601');
     // 2010-01-01 12:01
     $configuration = array('dateField' => 'timestampToIsoDate');
     $this->service->processDocument($this->documentMock, $configuration);
     $value = $this->documentMock->getField('dateField');
     $this->assertEquals($value['value'], array('2010-01-01T12:00:00Z', '2010-01-01T12:00:01Z'), 'field was not processed with timestampToIsoDate');
 }
Esempio n. 16
0
 /**
  * Adds fields to the document as defined in $indexingConfiguration
  *
  * @param Apache_Solr_Document $document base document to add fields to
  * @param array $indexingConfiguration Indexing configuration / mapping
  * @param array $data Record data
  * @return Apache_Solr_Document Modified document with added fields
  */
 protected function addDocumentFieldsFromTyposcript(Apache_Solr_Document $document, array $indexingConfiguration, array $data)
 {
     // mapping of record fields => solr document fields, resolving cObj
     foreach ($indexingConfiguration as $solrFieldName => $recordFieldName) {
         if (is_array($recordFieldName)) {
             // configuration for a content object, skipping
             continue;
         }
         $fieldValue = $this->resolveFieldValue($indexingConfiguration, $solrFieldName, $data);
         if (is_array($fieldValue)) {
             // multi value
             foreach ($fieldValue as $multiValue) {
                 $document->addField($solrFieldName, $multiValue);
             }
         } else {
             $document->setField($solrFieldName, $fieldValue);
         }
     }
     return $document;
 }
 /**
  * Method for adding an object from the database into the index.
  *
  * @param DataObject
  * @param string
  * @param array
  */
 protected function _addAs($object, $base, $options)
 {
     $includeSubs = $options['include_children'];
     $doc = new Apache_Solr_Document();
     // Always present fields
     $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs));
     $doc->setField('ID', $object->ID);
     $doc->setField('ClassName', $object->ClassName);
     foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) {
         $doc->addField('ClassHierarchy', $class);
     }
     // Add the user-specified fields
     foreach ($this->getFieldsIterator() as $name => $field) {
         if ($field['base'] == $base) {
             $this->_addField($doc, $object, $field);
         }
     }
     // CUSTOM Duplicate index combined fields ("Title" rather than
     // "SiteTree_Title").
     //
     // This allows us to sort on these fields without deeper architectural
     // changes to the fulltextsearch module. Note: We can't use <copyField>
     // for this purpose because it only writes into multiValue=true
     // fields, and those can't be (reliably) sorted on.
     $this->_addField($doc, $object, $this->getCustomPropertyFieldData('Title', $object));
     $this->_addField($doc, $object, $this->getCustomPropertyFieldData('LastEdited', $object, 'SSDatetime'));
     $this->getService()->addDocument($doc);
     return $doc;
 }
Esempio n. 18
0
 /**
  * Build a Solr document for a specific file
  *
  * @param integer $storeId Store ID the file belongs to/where it is linked on a page
  * @param Asm_Solr_Model_Indexqueue_File $file The file to index
  * @return Apache_Solr_Document
  */
 protected function buildFileDocument($storeId, Asm_Solr_Model_Indexqueue_File $file)
 {
     $helper = Mage::helper('solr');
     $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB);
     $host = parse_url($baseUrl, PHP_URL_HOST);
     $document = new Apache_Solr_Document();
     $document->setField('appKey', 'Asm_Solr');
     $document->setField('type', 'solr/indexqueue_file');
     $document->setField('id', $helper->getFileDocumentId($file->getId()));
     $document->setField('site', $host);
     $document->setField('siteHash', $helper->getSiteHashForDomain($host));
     $document->setField('storeId', $storeId);
     $document->setField('changed', $helper->dateToIso($file->getFileLastChangedTime()));
     $document->setField('productId', 0);
     $document->setField('sku', 'solr/indexqueue_file');
     $document->setField('title', $file->getName());
     $document->setField('content', $file->getContent());
     $document->setField('url', $file->getUrl());
     return $document;
 }
Esempio n. 19
0
 public function testClearReturnsDocumentToDefaultState()
 {
     // set the document boost
     $this->_fixture->setBoost(0.5);
     // set a field
     $this->_fixture->someField = "some value";
     // clear the document to remove boost and fields
     $this->_fixture->clear();
     // document boost should now be false
     $this->assertFalse($this->_fixture->getBoost());
     // document fields should now be empty
     $this->assertEquals(0, count($this->_fixture->getFieldNames()));
     $this->assertEquals(0, count($this->_fixture->getFieldValues()));
     $this->assertEquals(0, count($this->_fixture->getFieldBoosts()));
     // document iterator should now be empty
     $this->assertEquals(0, iterator_count($this->_fixture));
 }
 /**
  * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
  *
  * @return string
  */
 protected function _documentToXmlFragment(Apache_Solr_Document $document)
 {
     $xml = '<doc';
     if ($document->getBoost() !== false) {
         $xml .= ' boost="' . $document->getBoost() . '"';
     }
     $xml .= '>';
     foreach ($document as $key => $value) {
         $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
         $fieldBoost = $document->getFieldBoost($key);
         if ($key) {
             if (is_array($value)) {
                 foreach ($value as $multivalue) {
                     $xml .= '<field name="' . $key . '"';
                     if ($fieldBoost !== false) {
                         $xml .= ' boost="' . $fieldBoost . '"';
                         // only set the boost for the first field in the set
                         $fieldBoost = false;
                     }
                     if (!mb_check_encoding($multivalue, 'UTF-8')) {
                         $multivalue = utf8_encode($multivalue);
                     }
                     $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
                     $xml .= '>' . $multivalue . '</field>';
                 }
             } else {
                 $xml .= '<field name="' . $key . '"';
                 if ($fieldBoost !== false) {
                     $xml .= ' boost="' . $fieldBoost . '"';
                 }
                 if (!mb_check_encoding($value, 'UTF-8')) {
                     $value = utf8_encode($value);
                 }
                 $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
                 $xml .= '>' . $value . '</field>';
             }
         }
     }
     $xml .= '</doc>';
     return $xml;
 }
Esempio n. 21
0
     			{
     				$authors[] = $a->forename . ' ' . $a->surname;
     			}
     			$item['authors'] = $authors;
     			$item['citation'] = reference_authors_to_text_string($reference)
     				. ' ' . $reference->year 
     				. ' ' . $reference->title
     				. ' ' . reference_to_citation_text_string($reference);
     */
     print_r($item);
     $parts[] = $item;
 }
 print_r($parts);
 $documents = array();
 foreach ($parts as $item => $fields) {
     $part = new Apache_Solr_Document();
     foreach ($fields as $key => $value) {
         if (is_array($value)) {
             foreach ($value as $datum) {
                 $part->setMultiValue($key, $datum);
             }
         } else {
             $part->{$key} = $value;
         }
     }
     $documents[] = $part;
 }
 //
 //
 // Load the documents into the index
 //
Esempio n. 22
0
 /**
  * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
  *
  * @return string
  */
 protected function _documentToXmlFragment(Apache_Solr_Document $document)
 {
     $xml = '<doc';
     if ($document->getBoost() !== false) {
         $xml .= ' boost="' . $document->getBoost() . '"';
     }
     $xml .= '>';
     foreach ($document as $key => $value) {
         $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
         $fieldBoost = $document->getFieldBoost($key);
         if (is_array($value)) {
             foreach ($value as $multivalue) {
                 $xml .= '<field name="' . $key . '"';
                 if ($fieldBoost !== false) {
                     $xml .= ' boost="' . $fieldBoost . '"';
                     // only set the boost for the first field in the set
                     $fieldBoost = false;
                 }
                 $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
                 $xml .= '>' . $multivalue . '</field>';
             }
         } else {
             $xml .= '<field name="' . $key . '"';
             if ($fieldBoost !== false) {
                 $xml .= ' boost="' . $fieldBoost . '"';
             }
             $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
             $xml .= '>' . $value . '</field>';
         }
     }
     $xml .= '</doc>';
     // replace any control characters to avoid Solr XML parser exception
     return $this->_stripCtrlChars($xml);
 }
 /**
  * Build a Solr document for a given page
  *
  * @param integer $storeId Store ID
  * @param Mage_Cms_Model_Page $page Page instance
  * @return Apache_Solr_Document
  */
 protected function buildPageDocument($storeId, $page)
 {
     $helper = Mage::helper('solr');
     $baseUrl = Mage::getBaseUrl(Mage_Core_Model_Store::URL_TYPE_WEB);
     $host = parse_url($baseUrl, PHP_URL_HOST);
     $document = new Apache_Solr_Document();
     $document->setField('appKey', 'Asm_Solr');
     $document->setField('type', 'cms/page');
     $document->setField('id', $helper->getPageDocumentId($page->getId()));
     $document->setField('site', $host);
     $document->setField('siteHash', $helper->getSiteHashForDomain($host));
     $document->setField('storeId', $storeId);
     $document->setField('created', $helper->dateToIso($page->getCreationTime()));
     $document->setField('changed', $helper->dateToIso($page->getUpdateTime()));
     $document->setField('sku', 'cms/page');
     $document->setField('productId', 0);
     $document->setField('pageId', $page->getId());
     $document->setField('title', $page->getTitle());
     $document->setField('content', Mage::helper('solr/contentExtractor')->getIndexableContent($page->getContent()));
     $document->setField('keywords', $helper->trimExplode(',', $page->getMetaKeywords(), true));
     $document->setField('url', Mage::helper('cms/page')->getPageUrl($page->getId()));
     return $document;
 }
 /**
  * Processes a physical unit for the Solr index
  *
  * @access	protected
  *
  * @param	tx_dlf_document		&$doc: The METS document
  * @param	integer		$page: The page number
  * @param	array		$physicalUnit: Array of the physical unit to process
  *
  * @return	integer		0 on success or 1 on failure
  */
 protected static function processPhysical(tx_dlf_document &$doc, $page, array $physicalUnit)
 {
     $errors = 0;
     // Read extension configuration.
     $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
     if (!empty($physicalUnit['files'][$extConf['fileGrpFulltext']])) {
         $file = $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpFulltext']]);
         // Load XML file.
         if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($file) || version_compare(phpversion(), '5.3.3', '<')) {
             // Set user-agent to identify self when fetching XML data.
             if (!empty($extConf['useragent'])) {
                 @ini_set('user_agent', $extConf['useragent']);
             }
             // Turn off libxml's error logging.
             $libxmlErrors = libxml_use_internal_errors(TRUE);
             // disable entity loading
             $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
             // Load XML from file.
             $xml = simplexml_load_string(file_get_contents($file));
             // reset entity loader setting
             libxml_disable_entity_loader($previousValueOfEntityLoader);
             // Reset libxml's error logging.
             libxml_use_internal_errors($libxmlErrors);
             if ($xml === FALSE) {
                 return 1;
             }
         } else {
             return 1;
         }
         // Load class.
         if (!class_exists('Apache_Solr_Document')) {
             require_once \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Document.php');
         }
         // Create new Solr document.
         $solrDoc = new Apache_Solr_Document();
         // Create unique identifier from document's UID and unit's XML ID.
         $solrDoc->setField('id', $doc->uid . $physicalUnit['id']);
         $solrDoc->setField('uid', $doc->uid);
         $solrDoc->setField('pid', $doc->pid);
         $solrDoc->setField('page', $page);
         if (!empty($physicalUnit['files'][$extConf['fileGrpThumbs']])) {
             $solrDoc->setField('thumbnail', $doc->getFileLocation($physicalUnit['files'][$extConf['fileGrpThumbs']]));
         }
         $solrDoc->setField('partof', $doc->parentId);
         $solrDoc->setField('root', $doc->rootId);
         $solrDoc->setField('sid', $physicalUnit['id']);
         $solrDoc->setField('toplevel', FALSE);
         $solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']);
         $solrDoc->setField('fulltext', tx_dlf_alto::getRawText($xml));
         try {
             self::$solr->service->addDocument($solrDoc);
         } catch (Exception $e) {
             if (!defined('TYPO3_cliMode')) {
                 $message = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Messaging\\FlashMessage', tx_dlf_helper::getLL('flash.solrException', TRUE) . '<br />' . htmlspecialchars($e->getMessage()), tx_dlf_helper::getLL('flash.error', TRUE), \TYPO3\CMS\Core\Messaging\FlashMessage::ERROR, TRUE);
                 tx_dlf_helper::addMessage($message);
             }
             return 1;
         }
     }
     return $errors;
 }
Esempio n. 25
0
 public function indexPage(Kwf_Component_Data $page, $debugOutput = false)
 {
     if (Kwc_Abstract::getFlag($page->componentClass, 'skipFulltext')) {
         return;
     }
     //performance
     //echo "checking for childComponents\n";
     $fulltextComponents = $this->getFulltextComponents($page);
     if ($fulltextComponents) {
         if ($debugOutput) {
             echo " *** indexing {$page->componentId} {$page->url}...";
         }
         $contents = $this->getFulltextContentForPage($page, $fulltextComponents);
         unset($fulltextComponents);
         if (!$contents) {
             if ($debugOutput) {
                 echo " [no content]\n";
             }
             return false;
         }
         if ($debugOutput) {
             echo " [" . implode(' ', array_keys($contents)) . "]\n";
         }
         $doc = new Apache_Solr_Document();
         foreach ($contents as $field => $text) {
             if ($text instanceof Kwf_DateTime) {
                 $text = gmdate('Y-m-d\\TH:i:s\\Z', $text->getTimestamp());
             }
             $doc->addField($field, $text);
         }
         $doc->addField('componentId', $page->componentId);
         $response = $this->_getSolrService($page)->addDocument($doc);
         if ($response->getHttpStatus() != 200) {
             throw new Kwf_Exception("addDocument failed");
         }
         $this->_getSolrService($page)->commit();
         $this->_afterIndex($page);
         return true;
     }
     return false;
 }
Esempio n. 26
0
function db_store_article($article, $PageID = 0, $updating = false)
{
    global $db;
    global $config;
    $update = false;
    $id = 0;
    // If we are editing an existing reference then we already know its id
    if (isset($article->reference_id)) {
        $id = $article->reference_id;
    } else {
        $id = db_find_article($article);
    }
    if ($id != 0) {
        if ($updating) {
            $update = true;
        } else {
            return $id;
        }
    }
    // Try and trap empty references
    if ($id == 0) {
        $ok = false;
        if (isset($article->title)) {
            $ok = $article->title != '';
        }
        if (!$ok) {
            return 0;
        }
    }
    if (!isset($article->genre)) {
        $article->genre = 'article';
    }
    $keys = array();
    $values = array();
    // Article metadata
    foreach ($article as $k => $v) {
        switch ($k) {
            // Ignore as it's an array
            case 'authors':
                break;
            case 'date':
                $keys[] = 'date';
                $values[] = $db->qstr($v);
                if (!isset($article->year)) {
                    $keys[] = 'year';
                    $values[] = $db->qstr(year_from_date($v));
                }
                break;
                // Don't store BHL URL here
            // Don't store BHL URL here
            case 'url':
                if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) {
                } else {
                    // extract Handle if it exists
                    if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) {
                        $keys[] = 'hdl';
                        $values[] = $db->qstr($m['hdl']);
                    } else {
                        $keys[] = $k;
                        $values[] = $db->qstr($v);
                    }
                }
                break;
                // Things we store as is
            // Things we store as is
            case 'title':
            case 'secondary_title':
            case 'volume':
            case 'series':
            case 'issue':
            case 'spage':
            case 'epage':
            case 'year':
            case 'date':
            case 'issn':
            case 'genre':
            case 'doi':
            case 'hdl':
            case 'lsid':
            case 'oclc':
            case 'pdf':
            case 'abstract':
            case 'pmid':
                $keys[] = $k;
                $values[] = $db->qstr($v);
                break;
                // Things we ignore
            // Things we ignore
            default:
                break;
        }
    }
    // Date
    if (!isset($article->date) && isset($article->year)) {
        $keys[] = 'date';
        $values[] = $db->qstr($article->year . '-00-00');
    }
    // BHL PageID
    if ($PageID != 0) {
        $keys[] = 'PageID';
        $values[] = $PageID;
    }
    // SICI
    $s = new Sici();
    $sici = $s->create($article);
    if ($sici != '') {
        $keys[] = 'sici';
        $values[] = $db->qstr($sici);
    }
    if ($update) {
        // Versioning?
        // Delete links	(author, pages, etc)
        // Don't delete page range as we may loose plates, etc. outside range
        /*
        $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        */
        $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // update (updated timestamp will be automatically updated)
        $sql = 'UPDATE rdmp_reference SET ';
        $num_values = count($keys);
        for ($i = 0; $i < $num_values; $i++) {
            if ($i > 0) {
                $sql .= ', ';
            }
            $sql .= $keys[$i] . '=' . $values[$i];
        }
        $sql .= ' WHERE reference_id=' . $id;
        /*		$cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file");
        		@fwrite($cache_file, $sql);
        		fclose($cache_file);
        */
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    } else {
        // Adding article for first time so add 'created' and 'updated' timestamp
        $keys[] = 'created';
        $values[] = 'NOW()';
        $keys[] = 'updated';
        $values[] = 'NOW()';
        $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        $id = $db->Insert_ID();
        // Store reference_cluster_id which we can use to group duplicates, by default
        // reference_cluster_id = reference_id
        $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    }
    // Indexing-------------------------------------------------------------------------------------
    if (1) {
        // solr
        // this code is redundant with code in reference.php but I use different objects
        // here and there (doh!). Also once we've added old stuff to solr this is the only place we
        // should be calling solr
        $solr = new Apache_Solr_Service('localhost', '8983', '/solr');
        if (!$solr->ping()) {
            echo 'Solr service not responding.';
            exit;
        }
        $item = array();
        $item['id'] = 'reference/' . $id;
        $item['title'] = $article->title;
        $item['publication_outlet'] = $article->secondary_title;
        $item['year'] = $article->year;
        $authors = array();
        foreach ($article->authors as $a) {
            $authors[] = $a->forename . ' ' . $a->surname;
        }
        $item['authors'] = $authors;
        $citation = '';
        $citation .= ' ' . $article->year;
        $citation .= ' ' . $article->title;
        $citation .= ' ' . $article->secondary_title;
        $citation .= ' ' . $article->volume;
        if (isset($article->issue)) {
            $citation .= '(' . $article->issue . ')';
        }
        $citation .= ':';
        $citation .= ' ';
        $citation .= $article->spage;
        if (isset($article->epage)) {
            $citation .= '-' . $article->epage;
        }
        $item['citation'] = $citation;
        $text = '';
        $num_authors = count($article->authors);
        $count = 0;
        if ($num_authors > 0) {
            foreach ($article->authors as $author) {
                $text .= $author->forename . ' ' . $author->lastname;
                if (isset($author->suffix)) {
                    $text .= ' ' . $author->suffix;
                }
                $count++;
                if ($count == 2 && $num_authors > 3) {
                    $text .= ' et al.';
                    break;
                }
                if ($count < $num_authors - 1) {
                    $text .= ', ';
                } else {
                    if ($count < $num_authors) {
                        $text .= ' and ';
                    }
                }
            }
        }
        $item['citation'] = $text . ' ' . $citation;
        $parts = array();
        $parts[] = $item;
        //print_r($parts);
        // add to solr
        $documents = array();
        foreach ($parts as $item => $fields) {
            $part = new Apache_Solr_Document();
            foreach ($fields as $key => $value) {
                if (is_array($value)) {
                    foreach ($value as $datum) {
                        $part->setMultiValue($key, $datum);
                    }
                } else {
                    $part->{$key} = $value;
                }
            }
            $documents[] = $part;
        }
        //
        //
        // Load the documents into the index
        //
        try {
            $solr->addDocuments($documents);
            $solr->commit();
            $solr->optimize();
        } catch (Exception $e) {
            echo $e->getMessage();
        }
    } else {
        $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata)
        if (isset($article->title)) {
            $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text)
			VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Versioning-----------------------------------------------------------------------------------
    // Store this object in version table so we can recover it if we overwrite item
    $ip = getip();
    $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')';
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    // Author(s)------------------------------------------------------------------------------------
    // Store author as and link to the article
    if (isset($article->authors)) {
        db_store_authors($id, $article->authors);
    }
    // Store page range (only if not updating, otherwise we may loose plates, etc.
    // that aren't in page range)
    if ($PageID != 0 && !$update) {
        $page_range = array();
        if (isset($article->spage) && isset($article->epage)) {
            $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1);
        } else {
            // No epage, so just get spage (to do: how do we tell user we don't have page range?)
            $page_range = bhl_page_range($PageID, 0);
        }
        //print_r($page_range);
        $count = 0;
        foreach ($page_range as $page) {
            $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) 
			VALUES (' . $id . ',' . $page . ',' . $count++ . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Tweet----------------------------------------------------------------------------------------
    if (!$update) {
        if ($config['twitter']) {
            $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib';
            // url + hashtag
            $url_len = strlen($url);
            $status = '';
            if (isset($article->title)) {
                $status = $article->title;
                $status_len = strlen($status);
                $extra = 140 - $status_len - $url_len - 1;
                if ($extra < 0) {
                    $status_len += $extra;
                    $status_len -= 1;
                    $status = substr($status, 0, $status_len);
                    $status .= '…';
                }
            }
            $status .= ' ' . $url;
            tweet($status);
        }
    }
    return $id;
}
Esempio n. 27
0
<?php

require dirname(__FILE__) . '/../../bootstrap/unit.php';
$t = new limeade_test(2, limeade_output::get());
$t->diag('document format a document');
$document = new Apache_Solr_Document();
$document->setBoost(10);
$document->setField('sfl_guid', 'GUID_1234');
$document->setField('name', 'Thomas Rabaix', 1);
$document->setMultiValue('skills', 'php');
$document->setMultiValue('skills', 'symfony');
$document->addField('skills', 'objective-c');
$expected = array('name' => 'skills', 'value' => array(0 => 'php', 1 => 'symfony', 2 => 'objective-c'), 'boost' => false);
$t->cmp_ok($document->getField('skills'), '==', $expected, '::getField test multivalue setter');
$expected = array('name' => 'name', 'value' => 'Thomas Rabaix', 'boost' => 1);
$t->cmp_ok($document->getField('name'), '==', $expected, '::getField test setter');
Esempio n. 28
0
 public function addResource($resource)
 {
     if (!is_array($resource)) {
         return false;
     }
     $fields = array('id' => $resource['Resource']['id'], 'sha' => $resource['Resource']['sha'], 'user' => $resource['User']['name'], 'filetype' => $resource['Resource']['mime_type'], 'filename' => $resource['Resource']['file_name'], 'type' => $resource['Resource']['type'], 'title' => $resource['Resource']['title'], 'public' => $resource['Resource']['public'], 'modified' => $this->_formatDate($resource['Resource']['modified']), 'created' => $this->_formatDate($resource['Resource']['created']), 'comment' => \_\pluck($resource['Comment'], 'content'), 'annotation' => \_\pluck($resource['Annotation'], 'caption'), 'keyword' => \_\pluck($resource['Keyword'], 'keyword'), 'collection' => $resource['Collection'] ?: array());
     $document = new \Apache_Solr_Document();
     foreach ($fields as $key => $val) {
         if (is_array($val)) {
             foreach ($val as $subval) {
                 $document->addField($key, $subval);
             }
         } else {
             $document->{$key} = $val;
         }
     }
     foreach ($resource['Metadatum'] as $m) {
         $document->addField($m['attribute'] . '_t', $m['value']);
     }
     $this->solr->addDocument($document);
     $this->solr->commit();
     $this->solr->optimize();
 }
Esempio n. 29
0
 /**
  * This returns an Apache_Solr_Document to index, if the addons say it
  * should be.
  *
  * @param Omeka_Record $record The record to index.
  * @param associative array of SolrSearch_Addon_Addon $addons The
  * configuration controlling how records are indexed.
  *
  * @return Apache_Solr_Document|null
  * @author Eric Rochester <*****@*****.**>
  **/
 public function indexRecord($record, $addon)
 {
     $doc = new Apache_Solr_Document();
     $doc->id = "{$addon->table}_{$record->id}";
     $doc->addField('model', $addon->table);
     $doc->addField('modelid', $record->id);
     $titleField = $addon->getTitleField();
     foreach ($addon->fields as $field) {
         $solrName = $this->makeSolrName($addon, $field->name);
         if (is_null($field->remote)) {
             $value = $this->getLocalValue($record, $field);
         } else {
             $value = $this->getRemoteValue($record, $field);
         }
         foreach ($value as $v) {
             $doc->addField($solrName, $v);
             if (!is_null($titleField) && $titleField->name === $field->name) {
                 $doc->addField('title', $v);
             }
         }
     }
     if ($addon->tagged) {
         foreach ($record->getTags() as $tag) {
             $doc->addField('tag', $tag->name);
         }
     }
     if ($addon->resultType) {
         $doc->addField('resulttype', $addon->resultType);
     }
     return $doc;
 }
Esempio n. 30
0
 /**
  * takes a search result document and processes its fields according to the
  * instructions configured in TS. Currently available instructions are
  *    * timestamp - converts a date field into a unix timestamp
  *    * serialize - uses serialize() to encode multivalue fields which then can be put out using the MULTIVALUE view helper
  *    * skip - skips the whole field so that it is not available in the result, useful for the spell field f.e.
  * The default is to do nothing and just add the document's field to the
  * resulting array.
  *
  * @param \Apache_Solr_Document $document the Apache_Solr_Document result document
  * @return array An array with field values processed like defined in TS
  */
 protected function processDocumentFieldsToArray(\Apache_Solr_Document $document)
 {
     $processingInstructions = $this->configuration->getSearchResultsFieldProcessingInstructionsConfiguration();
     $availableFields = $document->getFieldNames();
     $result = array();
     foreach ($availableFields as $fieldName) {
         $processingInstruction = $processingInstructions[$fieldName];
         // TODO switch to field processors
         // TODO allow to have multiple (comma-separated) instructions for each field
         switch ($processingInstruction) {
             case 'timestamp':
                 $processedFieldValue = Util::isoToTimestamp($document->{$fieldName});
                 break;
             case 'serialize':
                 if (!empty($document->{$fieldName})) {
                     $processedFieldValue = serialize($document->{$fieldName});
                 } else {
                     $processedFieldValue = '';
                 }
                 break;
             case 'skip':
                 continue 2;
             default:
                 $processedFieldValue = $document->{$fieldName};
         }
         // escape markers in document fields
         // TODO remove after switching to fluid templates
         $processedFieldValue = Template::escapeMarkers($processedFieldValue);
         $result[$fieldName] = $processedFieldValue;
     }
     return $result;
 }