コード例 #1
0
 /**
  * Return fields to be indexed in Solr (an alternative to an XSL transformation)
  *
  * @return string[]
  */
 public function toSolrArray()
 {
     $data = parent::toSolrArray();
     // Nonstandard author fields
     $authors = $this->getValues('author');
     if ($authors) {
         $data['author'] = array_shift($authors);
         if (isset($data['author2'])) {
             $data['author2'] = array_merge($authors, $data['author2']);
         } else {
             $data['author2'] = $authors;
         }
     }
     if (isset($data['publishDate'])) {
         $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate']);
         $data['main_date'] = $this->validateDate($this->getPublicationYear() . '-01-01T00:00:00Z');
     }
     if ($range = $this->getPublicationDateRange()) {
         $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = MetadataUtils::dateRangeToNumeric($range);
         $data['search_daterange_mv'][] = $data['publication_daterange'] = MetadataUtils::dateRangeToStr($range);
     }
     foreach ($this->doc->relation as $relation) {
         $url = (string) $relation;
         // Ignore too long fields. Require at least one dot surrounded by valid
         // characters or a familiar scheme
         if (strlen($url) > 4096 || !preg_match('/[A-Za-z0-9]\\.[A-Za-z0-9]/', $url) && !preg_match('/^(http|ftp)s?:\\/\\//', $url)) {
             continue;
         }
         $link = ['url' => $url, 'text' => '', 'source' => $this->source];
         $data['online_boolean'] = true;
         $data['online_str_mv'] = $this->source;
         $data['online_urls_str_mv'][] = json_encode($link);
     }
     foreach ($this->doc->file as $file) {
         $url = (string) $file->attributes()->href ? (string) $file->attributes()->href : (string) $file;
         $link = ['url' => $url, 'text' => (string) $file->attributes()->name, 'source' => $this->source];
         $data['online_boolean'] = true;
         $data['online_str_mv'] = $this->source;
         $data['online_urls_str_mv'][] = json_encode($link);
         if (strcasecmp($file->attributes()->bundle, 'THUMBNAIL') == 0 && !isset($data['thumbnail'])) {
             $data['thumbnail'] = $url;
         }
     }
     if ($this->doc->permaddress) {
         $data['url'] = (string) $this->doc->permaddress[0];
     }
     $data['source_str_mv'] = $this->source;
     $data['datasource_str_mv'] = $this->source;
     return $data;
 }
コード例 #2
0
ファイル: NdlDcRecord.php プロジェクト: grharry/RecordManager
 /**
  * Return fields to be indexed in Solr (an alternative to an XSL transformation)
  *
  * @return string[]
  */
 public function toSolrArray()
 {
     $data = parent::toSolrArray();
     if (isset($data['publishDate'])) {
         $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate']);
         $data['main_date'] = $this->validateDate($this->getPublicationYear() . '-01-01T00:00:00Z');
     }
     if ($range = $this->getPublicationDateRange()) {
         $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = metadataUtils::dateRangeToNumeric($range);
         $data['search_daterange_mv'][] = $data['publication_daterange'] = metadataUtils::dateRangeToStr($range);
     }
     // language, take only first
     $languages = array_filter(explode(' ', (string) $this->doc->language), function ($value) {
         return preg_match('/^[a-z]{2,3}$/', $value) && $value != 'zxx' && $value != 'und';
     });
     $data['language'] = array_shift($languages);
     $data['source_str_mv'] = $this->source;
     $data['datasource_str_mv'] = $this->source;
     return $data;
 }
コード例 #3
0
ファイル: EseRecord.php プロジェクト: grharry/RecordManager
 /**
  * Dedup: Return ISBNs in ISBN-13 format without dashes
  *
  * @return string[]
  */
 public function getISBNs()
 {
     $arr = [];
     foreach ($this->doc->identifier as $identifier) {
         $identifier = str_replace('-', '', $identifier);
         if (!preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) {
             continue;
         }
         $isbn = $matches[1];
         if (strlen($isbn) == 10) {
             $isbn = MetadataUtils::isbn10to13($isbn);
         }
         if ($isbn) {
             $arr[] = $isbn;
         }
     }
     return array_values(array_unique($arr));
 }
コード例 #4
0
ファイル: BaseRecord.php プロジェクト: grharry/RecordManager
 /**
  * Verify that a string is valid ISO8601 date
  *
  * @param string $dateString Date string
  *
  * @return string Valid date string or an empty string if invalid
  */
 protected function validateDate($dateString)
 {
     if (MetadataUtils::validateISO8601Date($dateString) !== false) {
         return $dateString;
     }
     return '';
 }
コード例 #5
0
ファイル: MarcRecord.php プロジェクト: grharry/RecordManager
 /**
  * Get genre facet fields
  *
  * @return string[] Topics
  */
 protected function getGenreFacets()
 {
     return MetadataUtils::ucFirst($this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '600', ['v' => 1]], [MarcRecord::GET_NORMAL, '610', ['v' => 1]], [MarcRecord::GET_NORMAL, '611', ['v' => 1]], [MarcRecord::GET_NORMAL, '630', ['v' => 1]], [MarcRecord::GET_NORMAL, '648', ['v' => 1]], [MarcRecord::GET_NORMAL, '650', ['v' => 1]], [MarcRecord::GET_NORMAL, '651', ['v' => 1]], [MarcRecord::GET_NORMAL, '655', ['a' => 1]], [MarcRecord::GET_NORMAL, '655', ['v' => 1]]], false, true, true));
 }
コード例 #6
0
 /**
  * Process a complete record set harvested e.g. from MetaLib
  *
  * @param string   $source           Source ID
  * @param string[] $harvestedRecords Array of records
  *
  * @return void
  */
 protected function processFullRecordSet($source, $harvestedRecords)
 {
     $this->log->log('processFullRecordSet', "[{$source}] Processing complete record set");
     // Create keyed array
     $records = [];
     foreach ($harvestedRecords as $record) {
         $marc = RecordFactory::createRecord('marc', $record, '', $source);
         $id = $marc->getID();
         $records["{$source}.{$id}"] = $record;
     }
     $this->log->log('processFullRecordSet', "[{$source}] Merging results with the records in database");
     $deleted = 0;
     $unchanged = 0;
     $changed = 0;
     $added = 0;
     $dbRecords = $this->db->record->find(['deleted' => false, 'source_id' => $source])->timeout($this->cursorTimeout);
     foreach ($dbRecords as $dbRecord) {
         $id = $dbRecord['_id'];
         if (!isset($records[$id])) {
             // Record not in harvested records, mark deleted
             $this->storeRecord($id, true, '');
             unset($records[$id]);
             ++$deleted;
             continue;
         }
         // Check if the record has changed
         $marc = RecordFactory::createRecord('marc', $records[$id], '', $source);
         if ($marc->serialize() != MetadataUtils::getRecordData($dbRecord, false)) {
             // Record changed, update...
             $this->storeRecord($id, false, $records[$id]);
             ++$changed;
         } else {
             ++$unchanged;
         }
         unset($records[$id]);
     }
     $this->log->log('processFullRecordSet', "[{$source}] Adding new records");
     foreach ($records as $id => $record) {
         $this->storeRecord($id, false, $record);
         ++$added;
     }
     $this->log->log('processFullRecordSet', "[{$source}] {$added} new, {$changed} changed, {$unchanged} unchanged and " . "{$deleted} deleted records processed");
 }
コード例 #7
0
 /**
  * Split title to main title and description. Tries to find the first sentence
  * break where the title can be split.
  *
  * @param string $title Title to split
  *
  * @return null|string Null if title was not split, otherwise the initial
  * title part
  */
 public static function splitTitle($title)
 {
     $i = 0;
     $parenLevel = 0;
     $bracketLevel = 0;
     // Make sure the title has single spaces for whitespace
     $title = preg_replace('/\\s+/', ' ', $title);
     $titleWords = explode(' ', $title);
     foreach ($titleWords as $word) {
         ++$i;
         $parenLevel += substr_count($word, '(');
         $parenLevel -= substr_count($word, ')');
         $bracketLevel += substr_count($word, '[');
         $bracketLevel -= substr_count($word, ']');
         if ($parenLevel == 0 && $bracketLevel == 0) {
             // Try to avoid splitting at short words or the very beginning
             if (substr($word, -1) == '.' && strlen($word) > 2 && ($i > 1 || strlen($word) > 4)) {
                 // Verify that the word is strippable (not abbreviation etc.)
                 $leadStripped = MetadataUtils::stripLeadingPunctuation($word);
                 $stripped = metadataUtils::stripTrailingPunctuation($leadStripped);
                 $nextFirst = isset($titleWords[$i]) ? substr($titleWords[$i], 0, 1) : '';
                 // 1.) There has to be something following this word.
                 // 2.) The trailing period must be strippable or end with a year.
                 // 3.) Next word has to start with a capital or digit
                 // 4.) Not something like 12-p.
                 // 5.) Not initials like A.N.
                 if ($nextFirst && ($leadStripped != $stripped || preg_match('/^\\d{4}\\.$/', $word)) && (is_numeric($nextFirst) || !ctype_lower($nextFirst)) && !preg_match('/.+\\-\\w{1,2}\\.$/', $word) && !preg_match('/^\\w\\.\\w\\.$/', $word)) {
                     return metadataUtils::stripTrailingPunctuation(implode(' ', array_splice($titleWords, 0, $i)));
                 }
             }
         }
     }
     return null;
 }
コード例 #8
0
 /**
  * Create a sort key
  *
  * @return string
  */
 public function getSortKey()
 {
     $key = strtoupper($this->letters);
     if ($this->digits) {
         if ($key) {
             $key .= ' ';
         }
         $key .= strlen((int) $this->digits);
         $key .= $this->digits;
     }
     $key .= $this->decimal;
     if ($this->suffix) {
         if ($key) {
             $key .= ' ';
             if (ctype_alpha($this->suffix[0])) {
                 $key .= '_';
             }
         }
         $key .= MetadataUtils::createSortableString($this->suffix);
     }
     if ($this->cutter) {
         foreach (preg_split('/[A-Za-z]\\d+/', $this->cutter) as $part) {
             if ($key) {
                 $key .= ' ';
             }
             $key .= MetadataUtils::createSortableString($part);
         }
     }
     return $key;
 }
コード例 #9
0
 /**
  * Get an array of all fields relevant to allfields search
  *
  * @return string[]
  */
 protected function getAllFields()
 {
     $subfieldFilter = ['650' => ['0' => 1, '2' => 1, '6' => 1, '8' => 1], '773' => ['0' => 1, '6' => 1, '7' => 1, '8' => 1, 'w' => 1], '856' => ['0' => 1, '6' => 1, '8' => 1, 'q' => 1], '979' => ['0' => 1, 'a' => 1, 'f' => 1]];
     $allFields = [];
     // Include ISBNs, also normalized if possible
     foreach ($this->getFields('020') as $field) {
         $isbns = $this->getSubfieldsArray($field, ['a' => 1, 'z' => 1]);
         foreach ($isbns as $isbn) {
             $allFields[] = $isbn;
             $isbn = MetadataUtils::normalizeISBN($isbn);
             if ($isbn) {
                 $allFields[] = $isbn;
             }
         }
     }
     foreach ($this->fields as $tag => $fields) {
         if ($tag >= 100 && $tag < 841 && $tag != 336 && $tag != 337 || $tag == 856 || $tag == 880 || $tag == 979) {
             foreach ($fields as $field) {
                 $subfields = $this->getAllSubfields($field, isset($subfieldFilter[$tag]) ? $subfieldFilter[$tag] : ['0' => 1, '6' => 1, '8' => 1]);
                 if ($subfields) {
                     $allFields = array_merge($allFields, $subfields);
                 }
             }
         }
     }
     $allFields = array_map(function ($str) {
         return MetadataUtils::stripLeadingPunctuation(MetadataUtils::stripTrailingPunctuation($str));
     }, $allFields);
     return array_values(array_unique($allFields));
 }
コード例 #10
0
ファイル: LidoRecord.php プロジェクト: grharry/RecordManager
 /**
  * Attempt to parse a string (in finnish) into a normalized date range.
  *
  * TODO: complicated normalizations like this should preferably reside within
  * their own, separate component which should allow modification of the
  * algorithm by methods other than hard-coding rules into source.
  *
  * @param string $input Date range
  *
  * @return string Two ISO 8601 dates separated with a comma on success, and null
  * on failure
  */
 protected function parseDateRange($input)
 {
     $input = trim(strtolower($input));
     if (preg_match('/(\\d\\d\\d\\d) ?- (\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[3];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d?\\d?\\d\\d) ?\\?/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year - 3;
         $endDate = $year + 3;
     } elseif (preg_match('/(\\d?\\d?\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
     } else {
         return null;
     }
     if (strlen($startDate) == 2) {
         $startDate = 1900 + (int) $startDate;
     }
     if (strlen($endDate) == 2) {
         $century = substr($startDate, 0, 2) . '00';
         $endDate = (int) $century + (int) $endDate;
     }
     if (empty($noprocess)) {
         $startDate = $startDate . '-01-01T00:00:00Z';
         $endDate = $endDate . '-12-31T23:59:59Z';
     }
     // Trying to index dates into the future? I don't think so...
     $yearNow = date('Y');
     if ($startDate > $yearNow || $endDate > $yearNow) {
         return null;
     }
     if (MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) {
         return null;
     }
     return "{$startDate},{$endDate}";
 }
コード例 #11
0
ファイル: EadRecord.php プロジェクト: grharry/RecordManager
 /**
  * Return fields to be indexed in Solr
  *
  * @param boolean $prependTitleWithSubtitle If true and title_sub differs from
  * title_short, title is formed by combining title_sub and title_short
  *
  * @return string[]
  */
 public function toSolrArray($prependTitleWithSubtitle)
 {
     $data = [];
     $doc = $this->doc;
     $data['ctrlnum'] = (string) $this->doc->attributes()->{'id'};
     $data['fullrecord'] = MetadataUtils::trimXMLWhitespace($doc->asXML());
     $data['allfields'] = $this->getAllFields($doc);
     if ($doc->scopecontent) {
         if ($doc->scopecontent->p) {
             // Join all p-elements into a flat string.
             $desc = [];
             foreach ($doc->scopecontent->p as $p) {
                 $desc[] = trim((string) $p);
             }
             $desc = implode('   /   ', $desc);
         } else {
             $desc = (string) $doc->scopecontent;
         }
         $data['description'] = $desc;
     }
     $authors = [];
     if ($names = $doc->xpath('controlaccess/persname')) {
         foreach ($names as $name) {
             if (trim((string) $name) !== '-') {
                 $authors[] = trim((string) $name);
             }
         }
     }
     if ($names = $doc->xpath('controlaccess/corpname')) {
         foreach ($names as $name) {
             $authors[] = trim((string) $name);
         }
     }
     if ($authors) {
         $data['author'] = array_shift($authors);
         $data['author-letter'] = $data['author'];
     }
     if ($authors) {
         $data['author2'] = $authors;
     }
     if ($doc->did->origination) {
         $data['author_additional'] = trim((string) $doc->did->origination->corpname);
     }
     if ($geoNames = $doc->xpath('controlaccess/geogname')) {
         $names = [];
         foreach ($geoNames as $name) {
             if (trim((string) $name) !== '-') {
                 $names[] = trim((string) $name);
             }
         }
         $data['geographic'] = $data['geographic_facet'] = $names;
     }
     if ($subjects = $doc->xpath('controlaccess/subject')) {
         $topics = [];
         foreach ($subjects as $subject) {
             if (trim((string) $subject) !== '-') {
                 $topics[] = trim((string) $subject);
             }
         }
         $data['topic'] = $data['topic_facet'] = $topics;
     }
     $genre = $doc->xpath('controlaccess/genreform');
     $data['format'] = (string) ($genre ? $genre[0] : $doc->attributes()->level);
     if (isset($doc->did->repository)) {
         $data['institution'] = (string) isset($doc->did->repository->corpname) ? $doc->did->repository->corpname : $doc->did->repository;
     }
     $data['title_sub'] = '';
     switch ($data['format']) {
         case 'fonds':
             break;
         case 'collection':
             break;
         case 'series':
         case 'subseries':
             $data['title_sub'] = (string) $doc->did->unitid;
             break;
         default:
             $data['title_sub'] = (string) $doc->did->unitid;
             if ($doc->{'add-data'}->parent) {
                 $data['series'] = (string) $doc->{'add-data'}->parent->attributes()->unittitle;
             }
             break;
     }
     $data['title_short'] = (string) $doc->did->unittitle;
     $data['title'] = '';
     if ($prependTitleWithSubtitle) {
         if ($data['title_sub'] && $data['title_sub'] != $data['title_short']) {
             $data['title'] = $data['title_sub'] . ' ';
         }
     }
     $data['title'] .= $data['title_short'];
     $data['title_full'] = $data['title_sort'] = $data['title'];
     $data['title_sort'] = mb_strtolower(MetadataUtils::stripLeadingPunctuation($data['title_sort']), 'UTF-8');
     if ($languages = $doc->did->xpath('langmaterial/language')) {
         foreach ($languages as $lang) {
             if (isset($lang->attributes()->langcode)) {
                 $langCode = trim((string) $lang->attributes()->langcode);
                 if ($langCode != '') {
                     $data['language'][] = $langCode;
                 }
             }
         }
     }
     if ($extents = $doc->did->xpath('physdesc/extent')) {
         foreach ($extents as $extent) {
             if (trim((string) $extent) !== '-') {
                 $data['physical'][] = (string) $extent;
             }
         }
     }
     $nodes = $this->doc->did->daogrp->xpath('daoloc[@role="image_thumbnail"]');
     if ($nodes) {
         // store first thumbnail
         $node = $nodes[0];
         if (isset($node->attributes()->href)) {
             $data['thumbnail'] = (string) $node->attributes()->href;
         }
     }
     $data['hierarchytype'] = 'Default';
     if ($this->doc->{'add-data'}->archive) {
         $archiveAttr = $this->doc->{'add-data'}->archive->attributes();
         $data['hierarchy_top_id'] = (string) $archiveAttr->{'id'};
         $data['hierarchy_top_title'] = (string) $archiveAttr->title;
         if ($archiveAttr->subtitle) {
             $data['hierarchy_top_title'] .= ' : ' . (string) $archiveAttr->subtitle;
         }
         $data['allfields'][] = $data['hierarchy_top_title'];
         if ($archiveAttr->sequence) {
             $data['hierarchy_sequence'] = (string) $archiveAttr->sequence;
         }
     }
     if ($this->doc->{'add-data'}->{'parent'}) {
         $data['hierarchy_parent_id'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->{'id'};
         $data['allfields'][] = $data['hierarchy_parent_title'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->title;
     } else {
         $data['is_hierarchy_id'] = $data['hierarchy_top_id'] = $this->getID();
         $data['is_hierarchy_title'] = $data['hierarchy_top_title'] = (string) $doc->did->unittitle;
     }
     return $data;
 }
コード例 #12
0
 /**
  * Get component parts in a sorted array
  *
  * @param string $sourceId     Source ID
  * @param string $hostRecordId Host record ID (doesn't include source id)
  *
  * @return array Array of component parts
  */
 protected function getComponentPartsSorted($sourceId, $hostRecordId)
 {
     $componentsIter = $this->db->record->find(['source_id' => $sourceId, 'host_record_id' => $hostRecordId])->timeout($this->cursorTimeout);
     $components = [];
     foreach ($componentsIter as $component) {
         $components[MetadataUtils::createIdSortKey($component['_id'])] = $component;
     }
     ksort($components);
     return array_values($components);
 }
コード例 #13
0
ファイル: SolrUpdater.php プロジェクト: grharry/RecordManager
 /**
  * Create Solr array for the given record
  *
  * @param array   $record           Mongo record
  * @param integer $mergedComponents Number of component parts merged to the
  * record
  *
  * @return string[]
  * @throws Exception
  */
 protected function createSolrArray($record, &$mergedComponents)
 {
     global $configArray;
     $metadataRecord = RecordFactory::createRecord($record['format'], MetadataUtils::getRecordData($record, true), $record['oai_id'], $record['source_id']);
     $source = $record['source_id'];
     if (!isset($this->settings[$source])) {
         // Try to reload data source settings as they might have been updated
         // during a long run
         $this->loadDatasources();
         if (!isset($this->settings[$source])) {
             $this->log->log('createSolrArray', "No settings found for data source '{$source}', record " . "{$record['_id']}: " . $this->prettyPrint($record, true), Logger::FATAL);
             throw new Exception("No settings found for data source '{$source}'");
         }
     }
     $settings = $this->settings[$source];
     $hiddenComponent = false;
     if (isset($record['host_record_id'])) {
         if ($settings['componentParts'] == 'merge_all') {
             $hiddenComponent = true;
         } elseif ($settings['componentParts'] == 'merge_non_articles' || $settings['componentParts'] == 'merge_non_earticles') {
             $format = $metadataRecord->getFormat();
             if (!in_array($format, $this->allArticleFormats)) {
                 $hiddenComponent = true;
             } elseif (in_array($format, $this->articleFormats)) {
                 $hiddenComponent = true;
             }
         }
     }
     if ($hiddenComponent && !$settings['indexMergedParts']) {
         return false;
     }
     $hasComponentParts = false;
     $components = null;
     if (!isset($record['host_record_id'])) {
         // Fetch info whether component parts exist and need to be merged
         if (!$record['linking_id']) {
             $this->log->log('createSolrArray', "linking_id missing for record '{$record['_id']}'", Logger::ERROR);
         } else {
             $components = $this->db->record->find(['source_id' => $record['source_id'], 'host_record_id' => $record['linking_id'], 'deleted' => false])->timeout($this->cursorTimeout);
             $hasComponentParts = $components->hasNext();
             $format = $metadataRecord->getFormat();
             $merge = false;
             if ($settings['componentParts'] == 'merge_all') {
                 $merge = true;
             } elseif (!in_array($format, $this->allJournalFormats)) {
                 $merge = true;
             } elseif (in_array($format, $this->journalFormats) && $settings['componentParts'] == 'merge_non_earticles') {
                 $merge = true;
             }
             if (!$merge) {
                 unset($components);
             }
         }
     }
     if (isset($components)) {
         $mergedComponents += $metadataRecord->mergeComponentParts($components);
     }
     if (isset($settings['solrTransformationXSLT'])) {
         $params = ['source_id' => $source, 'institution' => $settings['institution'], 'format' => $settings['format'], 'id_prefix' => $settings['idPrefix']];
         $data = $settings['solrTransformationXSLT']->transformToSolrArray($metadataRecord->toXML(), $params);
     } else {
         $prependTitleWithSubtitle = isset($settings['prepend_title_with_subtitle']) ? $settings['prepend_title_with_subtitle'] : true;
         $data = $metadataRecord->toSolrArray($prependTitleWithSubtitle);
         $this->enrich($source, $settings, $metadataRecord, $data);
     }
     $data['id'] = $record['_id'];
     // Record links between host records and component parts
     if ($metadataRecord->getIsComponentPart()) {
         $hostRecord = null;
         if (isset($record['host_record_id']) && $this->db) {
             $hostRecord = $this->db->record->find(['source_id' => $record['source_id'], 'linking_id' => $record['host_record_id']])->limit(-1)->timeout($this->cursorTimeout)->getNext();
         }
         if (!$hostRecord) {
             if (isset($record['host_record_id'])) {
                 $this->log->log('createSolrArray', "Host record '" . $record['host_record_id'] . "' not found for record '" . $record['_id'] . "'", Logger::WARNING);
             }
             $data['container_title'] = $metadataRecord->getContainerTitle();
         } else {
             $data['hierarchy_parent_id'] = $hostRecord['_id'];
             $hostMetadataRecord = RecordFactory::createRecord($hostRecord['format'], MetadataUtils::getRecordData($hostRecord, true), $hostRecord['oai_id'], $hostRecord['source_id']);
             $data['container_title'] = $data['hierarchy_parent_title'] = $hostMetadataRecord->getTitle();
         }
         $data['container_volume'] = $metadataRecord->getVolume();
         $data['container_issue'] = $metadataRecord->getIssue();
         $data['container_start_page'] = $metadataRecord->getStartPage();
         $data['container_reference'] = $metadataRecord->getContainerReference();
     } else {
         // Add prefixes to hierarchy linking fields
         foreach (['hierarchy_top_id', 'hierarchy_parent_id', 'is_hierarchy_id'] as $field) {
             if (isset($data[$field]) && $data[$field]) {
                 $data[$field] = $record['source_id'] . '.' . $data[$field];
             }
         }
     }
     if ($hasComponentParts) {
         $data['is_hierarchy_id'] = $record['_id'];
         $data['is_hierarchy_title'] = $metadataRecord->getTitle();
     }
     if (!isset($data['institution'])) {
         $data['institution'] = $settings['institution'];
     }
     foreach ($settings['extraFields'] as $extraField) {
         $fieldName = key($extraField);
         $fieldValue = current($extraField);
         if (isset($data[$fieldName])) {
             if (!is_array($data[$fieldName])) {
                 $data[$fieldName] = [$data[$fieldName]];
             }
             $data[$fieldName][] = $fieldValue;
         } else {
             $data[$fieldName] = $fieldValue;
         }
     }
     // Map field values according to any mapping files
     foreach ($settings['mappingFiles'] as $field => $map) {
         if (isset($data[$field]) && !empty($data[$field])) {
             if (is_array($data[$field])) {
                 $newValues = null;
                 foreach ($data[$field] as $value) {
                     if (isset($map[$value])) {
                         $newValues = $map[$value];
                     } elseif (isset($map['##default'])) {
                         $newValues = $map['##default'];
                     }
                 }
                 if (null !== $newValues) {
                     if (is_array($newValues)) {
                         $data[$field] = array_values(array_unique($newValues));
                     } else {
                         $data[$field] = $newValues;
                     }
                 }
             } else {
                 if (isset($map[$data[$field]])) {
                     $data[$field] = $map[$data[$field]];
                 } elseif (isset($map['##default'])) {
                     $data[$field] = $map['##default'];
                 }
             }
         } elseif (isset($map['##empty'])) {
             $data[$field] = $map['##empty'];
         } elseif (isset($map['##emptyarray'])) {
             $data[$field] = [$map['##emptyarray']];
         }
     }
     // Special case: Special values for building (institution/location).
     // Used by default if building is set as a hierarchical facet.
     if ($this->buildingHierarchy || isset($settings['institutionInBuilding'])) {
         $useInstitution = isset($settings['institutionInBuilding']) ? $settings['institutionInBuilding'] : 'institution';
         switch ($useInstitution) {
             case 'driver':
                 $institutionCode = $data['institution'];
                 break;
             case 'none':
                 $institutionCode = '';
                 break;
             case 'source':
                 $institutionCode = $source;
                 break;
             case 'institution/source':
                 $institutionCode = $settings['institution'] . '/' . $source;
                 break;
             default:
                 $institutionCode = $settings['institution'];
                 break;
         }
         if ($institutionCode) {
             if (isset($data['building']) && $data['building']) {
                 if (is_array($data['building'])) {
                     foreach ($data['building'] as &$building) {
                         // Allow also empty values that might result from
                         // mapping tables
                         if ($building !== '') {
                             $building = "{$institutionCode}/{$building}";
                         }
                     }
                 } else {
                     $data['building'] = $institutionCode . '/' . $data['building'];
                 }
             } else {
                 $data['building'] = [$institutionCode];
             }
         }
     }
     // Hierarchical facets
     if (isset($configArray['Solr']['hierarchical_facets'])) {
         foreach ($configArray['Solr']['hierarchical_facets'] as $facet) {
             if (!isset($data[$facet])) {
                 continue;
             }
             $array = [];
             if (!is_array($data[$facet])) {
                 $data[$facet] = [$data[$facet]];
             }
             foreach ($data[$facet] as $datavalue) {
                 if ($datavalue === '') {
                     continue;
                 }
                 $values = explode('/', $datavalue);
                 $hierarchyString = '';
                 for ($i = 0; $i < count($values); $i++) {
                     $hierarchyString .= '/' . $values[$i];
                     $array[] = $i . $hierarchyString . '/';
                 }
             }
             $data[$facet] = $array;
         }
     }
     if (!isset($data['allfields'])) {
         $all = [];
         foreach ($data as $key => $field) {
             if (in_array($key, ['fullrecord', 'thumbnail', 'id', 'recordtype', 'ctrlnum'])) {
                 continue;
             }
             if (is_array($field)) {
                 $all = array_merge($all, $field);
             } else {
                 $all[] = $field;
             }
         }
         $data['allfields'] = MetadataUtils::array_iunique($all);
     }
     $data['first_indexed'] = MetadataUtils::formatTimestamp($record['created']->sec);
     $data['last_indexed'] = MetadataUtils::formatTimestamp($record['date']->sec);
     $data['recordtype'] = $record['format'];
     if (!isset($data['fullrecord'])) {
         $data['fullrecord'] = $metadataRecord->toXML();
     }
     if (!is_array($data['format'])) {
         $data['format'] = [$data['format']];
     }
     if (isset($configArray['Solr']['format_in_allfields']) && $configArray['Solr']['format_in_allfields']) {
         foreach ($data['format'] as $format) {
             // Replace numbers since they may be be considered word boundaries
             $data['allfields'][] = str_replace(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], ['ax', 'bx', 'cx', 'dx', 'ex', 'fx', 'gx', 'hx', 'ix', 'jx'], MetadataUtils::normalize($format));
         }
     }
     if ($hiddenComponent) {
         $data['hidden_component_boolean'] = true;
     }
     foreach ($data as $key => &$values) {
         if (is_array($values)) {
             foreach ($values as $key => &$value) {
                 $value = MetadataUtils::normalizeUnicode($value);
                 if (empty($value) || $value === 0 || $value === 0.0 || $value === '0') {
                     unset($values[$key]);
                 }
             }
             $values = array_values(array_unique($values));
         } elseif ($key != 'fullrecord') {
             $values = MetadataUtils::normalizeUnicode($values);
         }
         if (empty($values) || $values === 0 || $values === 0.0 || $values === '0') {
             unset($data[$key]);
         }
     }
     return $data;
 }
コード例 #14
0
 /**
  * Attempt to parse a string (in finnish) into a normalized date range.
  *
  * TODO: complicated normalizations like this should preferably reside within
  * their own, separate component which should allow modification of the algorithm
  * by methods other than hard-coding rules into source.
  *
  * @param string $input Date range
  *
  * @return string[] Two ISO 8601 dates
  */
 protected function parseDateRange($input)
 {
     $input = trim(strtolower($input));
     $dateMappings = ['kivikausi' => ['-8600-01-01T00:00:00Z', '-1501-12-31T23:59:59Z'], 'pronssikausi' => ['-1500-01-01T00:00:00Z', '-0501-12-31T23:59:59Z'], 'rautakausi' => ['-0500-01-01T00:00:00Z', '1299-12-31T23:59:59Z'], 'keskiaika' => ['1300-01-01T00:00:00Z', '1550-12-31T23:59:59Z'], 'ajoittamaton' => null, 'tuntematon' => null];
     foreach ($dateMappings as $str => $value) {
         if (strstr($input, $str)) {
             return $value;
         }
     }
     $k = ['tammikuu' => '01', 'helmikuu' => '02', 'maaliskuu' => '03', 'huhtikuu' => '04', 'toukokuu' => '05', 'kesäkuu' => '06', 'heinäkuu' => '07', 'elokuu' => '08', 'syyskuu' => '09', 'lokakuu' => '10', 'marraskuu' => '11', 'joulukuu' => '12'];
     $imprecise = false;
     list($input) = explode(',', $input, 2);
     if (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[6], $matches[5], $matches[4]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-01-01T00:00:00Z', $matches[1]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[3], $matches[2]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]);
         $endDate = sprintf('%04d-12-31T23:59:59Z', $matches[4]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-01T00:00:00Z', $matches[1], $matches[2]);
         $endDate = sprintf('%04d-%02d-01', $matches[3], $matches[4]);
         try {
             $d = new DateTime($endDate);
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) {
         // This one needs to be before the lazy matcher below
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
         if ($endDate % 100 == 0) {
             // Century
             $endDate += 99;
         } elseif ($endDate % 10 == 0) {
             // Decade
             $endDate += 9;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-|~)\\s*(\\d?\\d?\\d\\d)\\s*(-luku|-l)?\\s*(\\(?\\?\\)?)?/', $input, $matches) > 0) {
         // 1940-1960-luku
         // 1940-1960-l
         // 1940-60-l
         // 1930 - 1970-luku
         // 30-40-luku
         $startDate = $matches[1];
         $endDate = $matches[3];
         if (isset($matches[4])) {
             if ($endDate % 10 == 0) {
                 $endDate += 9;
             }
         }
         $imprecise = isset($matches[5]);
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s+(tammikuu|helmikuu|maaliskuu|huhtikuu|toukokuu|kesäkuu|heinäkuu|elokuu|syyskuu|lokakuu|marraskuu|joulukuu)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = $k[$matches[2]];
         $startDate = $year . '-' . $month . '-01T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
             $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $startDate = $year . '-' . $month . '-01T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[3];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[2];
         $month = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-01' . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
             $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $noprocess = true;
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(alkupuolelta|alkupuoli|alku|alusta)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year;
             $endDate = $year + 29;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year;
             $endDate = $year + 3;
         } else {
             // Uhh?
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(puoliväli)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year + 29;
             $endDate = $year + 70;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year + 3;
             $endDate = $year + 7;
         } else {
             // Uhh?
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu|lopulta|loppupuolelta)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year + 70;
             $endDate = $year + 99;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year + 7;
             $endDate = $year + 9;
         } else {
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*-(luku|luvulta|l)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         if ($year % 100 == 0) {
             $endDate = $year + 99;
         } elseif ($year % 10 == 0) {
             $endDate = $year + 9;
         } else {
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*ekr.?/', $input, $matches) > 0) {
         $startDate = -$matches[1];
         $endDate = -$matches[2];
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*jkr.?/', $input, $matches) > 0) {
         $startDate = -$matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d) jälkeen/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year + 9;
     } elseif (preg_match('/(-?\\d\\d\\d\\d)\\s*-\\s*(-?\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d{1-4})\\s+-\\s+(-?\\d{1-4})/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*\\?/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
         $imprecise = true;
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
     } else {
         return null;
     }
     if ($startDate < 0) {
         $startDate = '-' . substr('0000', 0, 5 - strlen($startDate)) . substr($startDate, 1);
     } elseif ($startDate == 0) {
         $startDate = '0000';
     }
     if ($endDate < 0) {
         $endDate = '-' . substr('0000', 0, 5 - strlen($endDate)) . substr($endDate, 1);
     } elseif ($endDate == 0) {
         $endDate = '0000';
     }
     switch (strlen($startDate)) {
         case 1:
             $startDate = "000{$startDate}";
             break;
         case 2:
             $startDate = "19{$startDate}";
             break;
         case 3:
             $startDate = "0{$startDate}";
             break;
     }
     switch (strlen($endDate)) {
         case 1:
             $endDate = "000{$endDate}";
             break;
         case 2:
             // Take into account possible negative sign
             $endDate = substr($startDate, 0, -2) . $endDate;
             break;
         case 3:
             $endDate = "0{$endDate}";
             break;
     }
     if ($imprecise) {
         // This is way arbitrary, so disabled for now..
         //$startDate -= 2;
         //$endDate += 2;
     }
     if (empty($noprocess)) {
         $startDate = $startDate . '-01-01T00:00:00Z';
         $endDate = $endDate . '-12-31T23:59:59Z';
     }
     // Trying to index dates into the future? I don't think so...
     $yearNow = date('Y');
     if ($startDate > $yearNow || $endDate > $yearNow) {
         return null;
     }
     $start = MetadataUtils::validateISO8601Date($startDate);
     $end = MetadataUtils::validateISO8601Date($endDate);
     if ($start === false || $end === false) {
         global $logger;
         $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from " . "'{$input}', record {$this->source}." . $this->getID(), Logger::WARNING);
         if ($start !== false) {
             $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z';
         } elseif ($end !== false) {
             $startDate = substr($endDate, 0, 4) . '-01-01T00:00:00Z';
         } else {
             return null;
         }
     } elseif ($start > $end) {
         global $logger;
         $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from '{$input}', " . "record {$this->source}." . $this->getID(), Logger::WARNING);
         $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z';
     }
     return [$startDate, $endDate];
 }
コード例 #15
0
 /**
  * Tests for createSortableString
  *
  * @return void
  */
 public function testCreateSortableString()
 {
     $this->assertEquals('A 3123', MetadataUtils::createSortableString('A 123'));
     $this->assertEquals('A 3123 18 ABC', MetadataUtils::createSortableString('A 123 8 abc'));
     $this->assertEquals('A 11 12', MetadataUtils::createSortableString('A  1   2'));
 }
コード例 #16
0
ファイル: DcRecord.php プロジェクト: grharry/RecordManager
 /**
  * Get all values for a tag
  *
  * @param string $tag XML tag to get
  *
  * @return array
  */
 protected function getValues($tag)
 {
     $values = [];
     foreach ($this->doc->{$tag} as $value) {
         $values[] = MetadataUtils::stripTrailingPunctuation((string) $value);
     }
     return $values;
 }
コード例 #17
0
 /**
  * Return fields to be indexed in Solr (an alternative to an XSL transformation)
  *
  * @param boolean $prependTitleWithSubtitle If true and title_sub differs from
  * title_short, title is formed by combining title_sub and title_short
  *
  * @return string[]
  */
 public function toSolrArray($prependTitleWithSubtitle)
 {
     $data = parent::toSolrArray($prependTitleWithSubtitle);
     $doc = $this->doc;
     $unitDateRange = $this->parseDateRange((string) $doc->did->unitdate);
     $data['search_sdaterange_mv'] = $data['unit_sdaterange'] = MetadataUtils::dateRangeToNumeric($unitDateRange);
     $data['search_daterange_mv'] = $data['unit_daterange'] = MetadataUtils::dateRangeToStr($unitDateRange);
     if ($unitDateRange) {
         $data['main_date_str'] = MetadataUtils::extractYear($unitDateRange[0]);
         $data['main_date'] = $this->validateDate($unitDateRange[0]);
         // Append year range to title (only years, not the full dates)
         $startYear = MetadataUtils::extractYear($unitDateRange[0]);
         $endYear = MetadataUtils::extractYear($unitDateRange[1]);
         $yearRange = '';
         if ($startYear != '-9999') {
             $yearRange = $startYear;
         }
         if ($endYear != $startYear) {
             $yearRange .= '-';
             if ($endYear != '9999') {
                 $yearRange .= $endYear;
             }
         }
         if ($yearRange) {
             $len = strlen($yearRange);
             foreach (['title_full', 'title_sort', 'title', 'title_short'] as $field) {
                 if (substr($data[$field], -$len) != $yearRange && substr($data[$field], -$len - 2) != "({$yearRange})") {
                     $data[$field] .= " ({$yearRange})";
                 }
             }
         }
     }
     // Single-valued sequence for sorting
     if (isset($data['hierarchy_sequence'])) {
         $data['hierarchy_sequence_str'] = $data['hierarchy_sequence'];
     }
     $data['source_str_mv'] = isset($data['institution']) ? $data['institution'] : $this->source;
     $data['datasource_str_mv'] = $this->source;
     // Digitized?
     if ($doc->did->daogrp) {
         if (in_array($data['format'], ['collection', 'series', 'fonds', 'item'])) {
             $data['format'] = 'digitized_' . $data['format'];
         }
         if ($this->doc->did->daogrp->daoloc) {
             foreach ($this->doc->did->daogrp->daoloc as $daoloc) {
                 if ($daoloc->attributes()->{'href'}) {
                     $data['online_boolean'] = true;
                     // This is sort of special. Make sure to use source instead
                     // of datasource.
                     $data['online_str_mv'] = $data['source_str_mv'];
                     break;
                 }
             }
         }
     }
     if (isset($doc->did->unitid)) {
         $data['identifier'] = (string) $doc->did->unitid;
     }
     if (isset($doc->did->dimensions)) {
         // display measurements
         $data['measurements'] = (string) $doc->did->dimensions;
     }
     if (isset($doc->did->physdesc)) {
         $data['material'] = (string) $doc->did->physdesc;
     }
     if (isset($doc->did->accessrestrict->p)) {
         $data['rights'] = (string) $doc->did->accessrestrict->p;
     }
     // Usage rights
     if ($rights = $this->getUsageRights()) {
         $data['usage_rights_str_mv'] = $rights;
     }
     return $data;
 }