/** * Convert to ISO2709. Return empty string if record too long. * * @return string */ protected function toISO2709() { $leader = str_pad(substr($this->fields['000'], 0, 24), 24); $directory = ''; $data = ''; $datapos = 0; foreach ($this->fields as $tag => $fields) { if ($tag == '000') { continue; } if (strlen($tag) != 3) { error_log("Invalid field tag: '{$tag}', id " . $this->getField('001')); continue; } foreach ($fields as $field) { if (is_array($field)) { $fieldStr = $field['i1'] . $field['i2']; if (isset($field['s']) && is_array($field['s'])) { foreach ($field['s'] as $subfield) { $subfieldCode = key($subfield); $fieldStr .= MARCRecord::SUBFIELD_INDICATOR . $subfieldCode . current($subfield); } } } else { // Additional normalization here so that we don't break ISO2709 // directory in SolrUpdater $fieldStr = MetadataUtils::normalizeUnicode($field); } $fieldStr .= MARCRecord::END_OF_FIELD; $len = strlen($fieldStr); if ($len > 9999) { return ''; } if ($datapos > 99999) { return ''; } $directory .= $tag . str_pad($len, 4, '0', STR_PAD_LEFT) . str_pad($datapos, 5, '0', STR_PAD_LEFT); $datapos += $len; $data .= $fieldStr; } } $directory .= MARCRecord::END_OF_FIELD; $data .= MARCRecord::END_OF_RECORD; $dataStart = strlen($leader) + strlen($directory); $recordLen = $dataStart + strlen($data); if ($recordLen > 99999) { return ''; } $leader = str_pad($recordLen, 5, '0', STR_PAD_LEFT) . substr($leader, 5, 7) . str_pad($dataStart, 5, '0', STR_PAD_LEFT) . substr($leader, 17); return $leader . $directory . $data; }
/** * Create Solr array for the given record * * @param array $record Mongo record * @param integer $mergedComponents Number of component parts merged to the * record * * @return string[] * @throws Exception */ protected function createSolrArray($record, &$mergedComponents) { global $configArray; $metadataRecord = RecordFactory::createRecord($record['format'], MetadataUtils::getRecordData($record, true), $record['oai_id'], $record['source_id']); $source = $record['source_id']; if (!isset($this->settings[$source])) { // Try to reload data source settings as they might have been updated // during a long run $this->loadDatasources(); if (!isset($this->settings[$source])) { $this->log->log('createSolrArray', "No settings found for data source '{$source}', record " . "{$record['_id']}: " . $this->prettyPrint($record, true), Logger::FATAL); throw new Exception("No settings found for data source '{$source}'"); } } $settings = $this->settings[$source]; $hiddenComponent = false; if (isset($record['host_record_id'])) { if ($settings['componentParts'] == 'merge_all') { $hiddenComponent = true; } elseif ($settings['componentParts'] == 'merge_non_articles' || $settings['componentParts'] == 'merge_non_earticles') { $format = $metadataRecord->getFormat(); if (!in_array($format, $this->allArticleFormats)) { $hiddenComponent = true; } elseif (in_array($format, $this->articleFormats)) { $hiddenComponent = true; } } } if ($hiddenComponent && !$settings['indexMergedParts']) { return false; } $hasComponentParts = false; $components = null; if (!isset($record['host_record_id'])) { // Fetch info whether component parts exist and need to be merged if (!$record['linking_id']) { $this->log->log('createSolrArray', "linking_id missing for record '{$record['_id']}'", Logger::ERROR); } else { $components = $this->db->record->find(['source_id' => $record['source_id'], 'host_record_id' => $record['linking_id'], 'deleted' => false])->timeout($this->cursorTimeout); $hasComponentParts = $components->hasNext(); $format = $metadataRecord->getFormat(); $merge = false; if ($settings['componentParts'] == 'merge_all') { $merge = true; } elseif (!in_array($format, $this->allJournalFormats)) { $merge = true; } elseif (in_array($format, $this->journalFormats) && $settings['componentParts'] == 'merge_non_earticles') { $merge = true; } if (!$merge) { unset($components); } } } if (isset($components)) { $mergedComponents += $metadataRecord->mergeComponentParts($components); } if (isset($settings['solrTransformationXSLT'])) { $params = ['source_id' => $source, 'institution' => $settings['institution'], 'format' => $settings['format'], 'id_prefix' => $settings['idPrefix']]; $data = $settings['solrTransformationXSLT']->transformToSolrArray($metadataRecord->toXML(), $params); } else { $prependTitleWithSubtitle = isset($settings['prepend_title_with_subtitle']) ? $settings['prepend_title_with_subtitle'] : true; $data = $metadataRecord->toSolrArray($prependTitleWithSubtitle); $this->enrich($source, $settings, $metadataRecord, $data); } $data['id'] = $record['_id']; // Record links between host records and component parts if ($metadataRecord->getIsComponentPart()) { $hostRecord = null; if (isset($record['host_record_id']) && $this->db) { $hostRecord = $this->db->record->find(['source_id' => $record['source_id'], 'linking_id' => $record['host_record_id']])->limit(-1)->timeout($this->cursorTimeout)->getNext(); } if (!$hostRecord) { if (isset($record['host_record_id'])) { $this->log->log('createSolrArray', "Host record '" . $record['host_record_id'] . "' not found for record '" . $record['_id'] . "'", Logger::WARNING); } $data['container_title'] = $metadataRecord->getContainerTitle(); } else { $data['hierarchy_parent_id'] = $hostRecord['_id']; $hostMetadataRecord = RecordFactory::createRecord($hostRecord['format'], MetadataUtils::getRecordData($hostRecord, true), $hostRecord['oai_id'], $hostRecord['source_id']); $data['container_title'] = $data['hierarchy_parent_title'] = $hostMetadataRecord->getTitle(); } $data['container_volume'] = $metadataRecord->getVolume(); $data['container_issue'] = $metadataRecord->getIssue(); $data['container_start_page'] = $metadataRecord->getStartPage(); $data['container_reference'] = $metadataRecord->getContainerReference(); } else { // Add prefixes to hierarchy linking fields foreach (['hierarchy_top_id', 'hierarchy_parent_id', 'is_hierarchy_id'] as $field) { if (isset($data[$field]) && $data[$field]) { $data[$field] = $record['source_id'] . '.' . $data[$field]; } } } if ($hasComponentParts) { $data['is_hierarchy_id'] = $record['_id']; $data['is_hierarchy_title'] = $metadataRecord->getTitle(); } if (!isset($data['institution'])) { $data['institution'] = $settings['institution']; } foreach ($settings['extraFields'] as $extraField) { $fieldName = key($extraField); $fieldValue = current($extraField); if (isset($data[$fieldName])) { if (!is_array($data[$fieldName])) { $data[$fieldName] = [$data[$fieldName]]; } $data[$fieldName][] = $fieldValue; } else { $data[$fieldName] = $fieldValue; } } // Map field values according to any mapping files foreach ($settings['mappingFiles'] as $field => $map) { if (isset($data[$field]) && !empty($data[$field])) { if (is_array($data[$field])) { $newValues = null; foreach ($data[$field] as $value) { if (isset($map[$value])) { $newValues = $map[$value]; } elseif (isset($map['##default'])) { $newValues = $map['##default']; } } if (null !== $newValues) { if (is_array($newValues)) { $data[$field] = array_values(array_unique($newValues)); } else { $data[$field] = $newValues; } } } else { if (isset($map[$data[$field]])) { $data[$field] = $map[$data[$field]]; } elseif (isset($map['##default'])) { $data[$field] = $map['##default']; } } } elseif (isset($map['##empty'])) { $data[$field] = $map['##empty']; } elseif (isset($map['##emptyarray'])) { $data[$field] = [$map['##emptyarray']]; } } // Special case: Special values for building (institution/location). // Used by default if building is set as a hierarchical facet. if ($this->buildingHierarchy || isset($settings['institutionInBuilding'])) { $useInstitution = isset($settings['institutionInBuilding']) ? $settings['institutionInBuilding'] : 'institution'; switch ($useInstitution) { case 'driver': $institutionCode = $data['institution']; break; case 'none': $institutionCode = ''; break; case 'source': $institutionCode = $source; break; case 'institution/source': $institutionCode = $settings['institution'] . '/' . $source; break; default: $institutionCode = $settings['institution']; break; } if ($institutionCode) { if (isset($data['building']) && $data['building']) { if (is_array($data['building'])) { foreach ($data['building'] as &$building) { // Allow also empty values that might result from // mapping tables if ($building !== '') { $building = "{$institutionCode}/{$building}"; } } } else { $data['building'] = $institutionCode . '/' . $data['building']; } } else { $data['building'] = [$institutionCode]; } } } // Hierarchical facets if (isset($configArray['Solr']['hierarchical_facets'])) { foreach ($configArray['Solr']['hierarchical_facets'] as $facet) { if (!isset($data[$facet])) { continue; } $array = []; if (!is_array($data[$facet])) { $data[$facet] = [$data[$facet]]; } foreach ($data[$facet] as $datavalue) { if ($datavalue === '') { continue; } $values = explode('/', $datavalue); $hierarchyString = ''; for ($i = 0; $i < count($values); $i++) { $hierarchyString .= '/' . $values[$i]; $array[] = $i . $hierarchyString . '/'; } } $data[$facet] = $array; } } if (!isset($data['allfields'])) { $all = []; foreach ($data as $key => $field) { if (in_array($key, ['fullrecord', 'thumbnail', 'id', 'recordtype', 'ctrlnum'])) { continue; } if (is_array($field)) { $all = array_merge($all, $field); } else { $all[] = $field; } } $data['allfields'] = MetadataUtils::array_iunique($all); } $data['first_indexed'] = MetadataUtils::formatTimestamp($record['created']->sec); $data['last_indexed'] = MetadataUtils::formatTimestamp($record['date']->sec); $data['recordtype'] = $record['format']; if (!isset($data['fullrecord'])) { $data['fullrecord'] = $metadataRecord->toXML(); } if (!is_array($data['format'])) { $data['format'] = [$data['format']]; } if (isset($configArray['Solr']['format_in_allfields']) && $configArray['Solr']['format_in_allfields']) { foreach ($data['format'] as $format) { // Replace numbers since they may be be considered word boundaries $data['allfields'][] = str_replace(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], ['ax', 'bx', 'cx', 'dx', 'ex', 'fx', 'gx', 'hx', 'ix', 'jx'], MetadataUtils::normalize($format)); } } if ($hiddenComponent) { $data['hidden_component_boolean'] = true; } foreach ($data as $key => &$values) { if (is_array($values)) { foreach ($values as $key => &$value) { $value = MetadataUtils::normalizeUnicode($value); if (empty($value) || $value === 0 || $value === 0.0 || $value === '0') { unset($values[$key]); } } $values = array_values(array_unique($values)); } elseif ($key != 'fullrecord') { $values = MetadataUtils::normalizeUnicode($values); } if (empty($values) || $values === 0 || $values === 0.0 || $values === '0') { unset($data[$key]); } } return $data; }