Ejemplo n.º 1
0
 /**
  * Create Solr array for the given record
  *
  * @param array   $record           Mongo record
  * @param integer $mergedComponents Number of component parts merged to the
  * record
  *
  * @return string[]
  * @throws Exception
  */
 protected function createSolrArray($record, &$mergedComponents)
 {
     global $configArray;
     $metadataRecord = RecordFactory::createRecord($record['format'], MetadataUtils::getRecordData($record, true), $record['oai_id'], $record['source_id']);
     $source = $record['source_id'];
     if (!isset($this->settings[$source])) {
         // Try to reload data source settings as they might have been updated
         // during a long run
         $this->loadDatasources();
         if (!isset($this->settings[$source])) {
             $this->log->log('createSolrArray', "No settings found for data source '{$source}', record " . "{$record['_id']}: " . $this->prettyPrint($record, true), Logger::FATAL);
             throw new Exception("No settings found for data source '{$source}'");
         }
     }
     $settings = $this->settings[$source];
     $hiddenComponent = false;
     if (isset($record['host_record_id'])) {
         if ($settings['componentParts'] == 'merge_all') {
             $hiddenComponent = true;
         } elseif ($settings['componentParts'] == 'merge_non_articles' || $settings['componentParts'] == 'merge_non_earticles') {
             $format = $metadataRecord->getFormat();
             if (!in_array($format, $this->allArticleFormats)) {
                 $hiddenComponent = true;
             } elseif (in_array($format, $this->articleFormats)) {
                 $hiddenComponent = true;
             }
         }
     }
     if ($hiddenComponent && !$settings['indexMergedParts']) {
         return false;
     }
     $hasComponentParts = false;
     $components = null;
     if (!isset($record['host_record_id'])) {
         // Fetch info whether component parts exist and need to be merged
         if (!$record['linking_id']) {
             $this->log->log('createSolrArray', "linking_id missing for record '{$record['_id']}'", Logger::ERROR);
         } else {
             $components = $this->db->record->find(['source_id' => $record['source_id'], 'host_record_id' => $record['linking_id'], 'deleted' => false])->timeout($this->cursorTimeout);
             $hasComponentParts = $components->hasNext();
             $format = $metadataRecord->getFormat();
             $merge = false;
             if ($settings['componentParts'] == 'merge_all') {
                 $merge = true;
             } elseif (!in_array($format, $this->allJournalFormats)) {
                 $merge = true;
             } elseif (in_array($format, $this->journalFormats) && $settings['componentParts'] == 'merge_non_earticles') {
                 $merge = true;
             }
             if (!$merge) {
                 unset($components);
             }
         }
     }
     if (isset($components)) {
         $mergedComponents += $metadataRecord->mergeComponentParts($components);
     }
     if (isset($settings['solrTransformationXSLT'])) {
         $params = ['source_id' => $source, 'institution' => $settings['institution'], 'format' => $settings['format'], 'id_prefix' => $settings['idPrefix']];
         $data = $settings['solrTransformationXSLT']->transformToSolrArray($metadataRecord->toXML(), $params);
     } else {
         $prependTitleWithSubtitle = isset($settings['prepend_title_with_subtitle']) ? $settings['prepend_title_with_subtitle'] : true;
         $data = $metadataRecord->toSolrArray($prependTitleWithSubtitle);
         $this->enrich($source, $settings, $metadataRecord, $data);
     }
     $data['id'] = $record['_id'];
     // Record links between host records and component parts
     if ($metadataRecord->getIsComponentPart()) {
         $hostRecord = null;
         if (isset($record['host_record_id']) && $this->db) {
             $hostRecord = $this->db->record->find(['source_id' => $record['source_id'], 'linking_id' => $record['host_record_id']])->limit(-1)->timeout($this->cursorTimeout)->getNext();
         }
         if (!$hostRecord) {
             if (isset($record['host_record_id'])) {
                 $this->log->log('createSolrArray', "Host record '" . $record['host_record_id'] . "' not found for record '" . $record['_id'] . "'", Logger::WARNING);
             }
             $data['container_title'] = $metadataRecord->getContainerTitle();
         } else {
             $data['hierarchy_parent_id'] = $hostRecord['_id'];
             $hostMetadataRecord = RecordFactory::createRecord($hostRecord['format'], MetadataUtils::getRecordData($hostRecord, true), $hostRecord['oai_id'], $hostRecord['source_id']);
             $data['container_title'] = $data['hierarchy_parent_title'] = $hostMetadataRecord->getTitle();
         }
         $data['container_volume'] = $metadataRecord->getVolume();
         $data['container_issue'] = $metadataRecord->getIssue();
         $data['container_start_page'] = $metadataRecord->getStartPage();
         $data['container_reference'] = $metadataRecord->getContainerReference();
     } else {
         // Add prefixes to hierarchy linking fields
         foreach (['hierarchy_top_id', 'hierarchy_parent_id', 'is_hierarchy_id'] as $field) {
             if (isset($data[$field]) && $data[$field]) {
                 $data[$field] = $record['source_id'] . '.' . $data[$field];
             }
         }
     }
     if ($hasComponentParts) {
         $data['is_hierarchy_id'] = $record['_id'];
         $data['is_hierarchy_title'] = $metadataRecord->getTitle();
     }
     if (!isset($data['institution'])) {
         $data['institution'] = $settings['institution'];
     }
     foreach ($settings['extraFields'] as $extraField) {
         $fieldName = key($extraField);
         $fieldValue = current($extraField);
         if (isset($data[$fieldName])) {
             if (!is_array($data[$fieldName])) {
                 $data[$fieldName] = [$data[$fieldName]];
             }
             $data[$fieldName][] = $fieldValue;
         } else {
             $data[$fieldName] = $fieldValue;
         }
     }
     // Map field values according to any mapping files
     foreach ($settings['mappingFiles'] as $field => $map) {
         if (isset($data[$field]) && !empty($data[$field])) {
             if (is_array($data[$field])) {
                 $newValues = null;
                 foreach ($data[$field] as $value) {
                     if (isset($map[$value])) {
                         $newValues = $map[$value];
                     } elseif (isset($map['##default'])) {
                         $newValues = $map['##default'];
                     }
                 }
                 if (null !== $newValues) {
                     if (is_array($newValues)) {
                         $data[$field] = array_values(array_unique($newValues));
                     } else {
                         $data[$field] = $newValues;
                     }
                 }
             } else {
                 if (isset($map[$data[$field]])) {
                     $data[$field] = $map[$data[$field]];
                 } elseif (isset($map['##default'])) {
                     $data[$field] = $map['##default'];
                 }
             }
         } elseif (isset($map['##empty'])) {
             $data[$field] = $map['##empty'];
         } elseif (isset($map['##emptyarray'])) {
             $data[$field] = [$map['##emptyarray']];
         }
     }
     // Special case: Special values for building (institution/location).
     // Used by default if building is set as a hierarchical facet.
     if ($this->buildingHierarchy || isset($settings['institutionInBuilding'])) {
         $useInstitution = isset($settings['institutionInBuilding']) ? $settings['institutionInBuilding'] : 'institution';
         switch ($useInstitution) {
             case 'driver':
                 $institutionCode = $data['institution'];
                 break;
             case 'none':
                 $institutionCode = '';
                 break;
             case 'source':
                 $institutionCode = $source;
                 break;
             case 'institution/source':
                 $institutionCode = $settings['institution'] . '/' . $source;
                 break;
             default:
                 $institutionCode = $settings['institution'];
                 break;
         }
         if ($institutionCode) {
             if (isset($data['building']) && $data['building']) {
                 if (is_array($data['building'])) {
                     foreach ($data['building'] as &$building) {
                         // Allow also empty values that might result from
                         // mapping tables
                         if ($building !== '') {
                             $building = "{$institutionCode}/{$building}";
                         }
                     }
                 } else {
                     $data['building'] = $institutionCode . '/' . $data['building'];
                 }
             } else {
                 $data['building'] = [$institutionCode];
             }
         }
     }
     // Hierarchical facets
     if (isset($configArray['Solr']['hierarchical_facets'])) {
         foreach ($configArray['Solr']['hierarchical_facets'] as $facet) {
             if (!isset($data[$facet])) {
                 continue;
             }
             $array = [];
             if (!is_array($data[$facet])) {
                 $data[$facet] = [$data[$facet]];
             }
             foreach ($data[$facet] as $datavalue) {
                 if ($datavalue === '') {
                     continue;
                 }
                 $values = explode('/', $datavalue);
                 $hierarchyString = '';
                 for ($i = 0; $i < count($values); $i++) {
                     $hierarchyString .= '/' . $values[$i];
                     $array[] = $i . $hierarchyString . '/';
                 }
             }
             $data[$facet] = $array;
         }
     }
     if (!isset($data['allfields'])) {
         $all = [];
         foreach ($data as $key => $field) {
             if (in_array($key, ['fullrecord', 'thumbnail', 'id', 'recordtype', 'ctrlnum'])) {
                 continue;
             }
             if (is_array($field)) {
                 $all = array_merge($all, $field);
             } else {
                 $all[] = $field;
             }
         }
         $data['allfields'] = MetadataUtils::array_iunique($all);
     }
     $data['first_indexed'] = MetadataUtils::formatTimestamp($record['created']->sec);
     $data['last_indexed'] = MetadataUtils::formatTimestamp($record['date']->sec);
     $data['recordtype'] = $record['format'];
     if (!isset($data['fullrecord'])) {
         $data['fullrecord'] = $metadataRecord->toXML();
     }
     if (!is_array($data['format'])) {
         $data['format'] = [$data['format']];
     }
     if (isset($configArray['Solr']['format_in_allfields']) && $configArray['Solr']['format_in_allfields']) {
         foreach ($data['format'] as $format) {
             // Replace numbers since they may be be considered word boundaries
             $data['allfields'][] = str_replace(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], ['ax', 'bx', 'cx', 'dx', 'ex', 'fx', 'gx', 'hx', 'ix', 'jx'], MetadataUtils::normalize($format));
         }
     }
     if ($hiddenComponent) {
         $data['hidden_component_boolean'] = true;
     }
     foreach ($data as $key => &$values) {
         if (is_array($values)) {
             foreach ($values as $key => &$value) {
                 $value = MetadataUtils::normalizeUnicode($value);
                 if (empty($value) || $value === 0 || $value === 0.0 || $value === '0') {
                     unset($values[$key]);
                 }
             }
             $values = array_values(array_unique($values));
         } elseif ($key != 'fullrecord') {
             $values = MetadataUtils::normalizeUnicode($values);
         }
         if (empty($values) || $values === 0 || $values === 0.0 || $values === '0') {
             unset($data[$key]);
         }
     }
     return $data;
 }