/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @return string[] */ public function toSolrArray() { $data = parent::toSolrArray(); // Nonstandard author fields $authors = $this->getValues('author'); if ($authors) { $data['author'] = array_shift($authors); if (isset($data['author2'])) { $data['author2'] = array_merge($authors, $data['author2']); } else { $data['author2'] = $authors; } } if (isset($data['publishDate'])) { $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate']); $data['main_date'] = $this->validateDate($this->getPublicationYear() . '-01-01T00:00:00Z'); } if ($range = $this->getPublicationDateRange()) { $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = MetadataUtils::dateRangeToNumeric($range); $data['search_daterange_mv'][] = $data['publication_daterange'] = MetadataUtils::dateRangeToStr($range); } foreach ($this->doc->relation as $relation) { $url = (string) $relation; // Ignore too long fields. Require at least one dot surrounded by valid // characters or a familiar scheme if (strlen($url) > 4096 || !preg_match('/[A-Za-z0-9]\\.[A-Za-z0-9]/', $url) && !preg_match('/^(http|ftp)s?:\\/\\//', $url)) { continue; } $link = ['url' => $url, 'text' => '', 'source' => $this->source]; $data['online_boolean'] = true; $data['online_str_mv'] = $this->source; $data['online_urls_str_mv'][] = json_encode($link); } foreach ($this->doc->file as $file) { $url = (string) $file->attributes()->href ? (string) $file->attributes()->href : (string) $file; $link = ['url' => $url, 'text' => (string) $file->attributes()->name, 'source' => $this->source]; $data['online_boolean'] = true; $data['online_str_mv'] = $this->source; $data['online_urls_str_mv'][] = json_encode($link); if (strcasecmp($file->attributes()->bundle, 'THUMBNAIL') == 0 && !isset($data['thumbnail'])) { $data['thumbnail'] = $url; } } if ($this->doc->permaddress) { $data['url'] = (string) $this->doc->permaddress[0]; } $data['source_str_mv'] = $this->source; $data['datasource_str_mv'] = $this->source; return $data; }
/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @return string[] */ public function toSolrArray() { $data = parent::toSolrArray(); if (isset($data['publishDate'])) { $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate']); $data['main_date'] = $this->validateDate($this->getPublicationYear() . '-01-01T00:00:00Z'); } if ($range = $this->getPublicationDateRange()) { $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = metadataUtils::dateRangeToNumeric($range); $data['search_daterange_mv'][] = $data['publication_daterange'] = metadataUtils::dateRangeToStr($range); } // language, take only first $languages = array_filter(explode(' ', (string) $this->doc->language), function ($value) { return preg_match('/^[a-z]{2,3}$/', $value) && $value != 'zxx' && $value != 'und'; }); $data['language'] = array_shift($languages); $data['source_str_mv'] = $this->source; $data['datasource_str_mv'] = $this->source; return $data; }
/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @return string[] */ public function toSolrArray() { $data = parent::toSolrArray(); if (empty($data['author'])) { $data['author'] = $data['author_fuller'] = $data['author-letter'] = $this->getFieldSubfields('110', ['a' => 1]); } $key = array_search($data['author'], $data['author2']); if ($key !== false) { unset($data['author2'][$key]); } if (isset($data['publishDate'])) { $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate'][0]); $data['main_date'] = $this->validateDate($data['main_date_str'] . '-01-01T00:00:00Z'); } if ($range = $this->getPublicationDateRange()) { $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = MetadataUtils::dateRangeToNumeric($range); $data['search_daterange_mv'][] = $data['publication_daterange'] = MetadataUtils::dateRangeToStr($range); } $data['publication_place_txt_mv'] = MetadataUtils::arrayTrim($this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '260', ['a' => 1]]]), ' []'); $data['subtitle_lng_str_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '041', ['j' => 1]], [MarcRecord::GET_NORMAL, '979', ['j' => 1]]], false, true, true); $data['original_lng_str_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '041', ['h' => 1]], [MarcRecord::GET_NORMAL, '979', ['i' => 1]]], false, true, true); // 979cd = component part authors // 900, 910, 911 = Finnish reference field foreach ($this->getFieldsSubfields([[MarcRecord::GET_BOTH, '979', ['c' => 1]], [MarcRecord::GET_BOTH, '979', ['d' => 1]], [MarcRecord::GET_BOTH, '900', ['a' => 1]], [MarcRecord::GET_BOTH, '910', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '911', ['a' => 1, 'e' => 1]]], false, true, true) as $field) { $data['author2'][] = $field; } $key = array_search($data['author'], $data['author2']); if ($key !== false) { unset($data['author2'][$key]); } $data['author2'] = array_filter(array_values($data['author2'])); $data['title_alt'] = array_values(array_unique($this->getFieldsSubfields([[MarcRecord::GET_ALT, '245', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '130', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'h' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 'r' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '240', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 'r' => 1, 's' => 1]], [MarcRecord::GET_BOTH, '243', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'h' => 1, 'k' => 1, 'l' => 1, 'm' => 1, 'n' => 1, 'o' => 1, 'p' => 1, 'r' => 1, 's' => 1]], [MarcRecord::GET_BOTH, '246', ['a' => 1, 'b' => 1, 'g' => 1]], [MarcRecord::GET_BOTH, '700', ['t' => 1, 'm' => 1, 'r' => 1, 'h' => 1, 'i' => 1, 'g' => 1, 'n' => 1, 'p' => 1, 's' => 1, 'l' => 1, 'o' => 1, 'k' => 1], ['t' => 1]], [MarcRecord::GET_BOTH, '730', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'h' => 1, 'i' => 1, 'k' => 1, 'l' => 1, 'm' => 1, 'n' => 1, 'o' => 1, 'p' => 1, 'r' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '740', ['a' => 1]], [MarcRecord::GET_BOTH, '979', ['b' => 1]], [MarcRecord::GET_BOTH, '979', ['e' => 1]], [MarcRecord::GET_BOTH, '940', ['a' => 1]]]))); // Location coordinates $field = $this->getField('034'); if ($field) { $westOrig = $this->getSubfield($field, 'd'); $eastOrig = $this->getSubfield($field, 'e'); $northOrig = $this->getSubfield($field, 'f'); $southOrig = $this->getSubfield($field, 'g'); $west = MetadataUtils::coordinateToDecimal($westOrig); $east = MetadataUtils::coordinateToDecimal($eastOrig); $north = MetadataUtils::coordinateToDecimal($northOrig); $south = MetadataUtils::coordinateToDecimal($southOrig); if (!is_nan($west) && !is_nan($north)) { if ($west < -180 || $west > 180 || ($north < -90 || $north > 90)) { global $logger; $logger->log('NdlMarcRecord', "Discarding invalid coordinates {$west},{$north} decoded from " . "w={$westOrig}, e={$eastOrig}, n={$northOrig}, s={$southOrig}, " . "record {$this->source}." . $this->getID(), Logger::WARNING); } else { if (!is_nan($east) && !is_nan($south)) { if ($east < -180 || $east > 180 || $south < -90 || $south > 90) { global $logger; $logger->log('NdlMarcRecord', "Discarding invalid coordinates {$east},{$south} " . "decoded from w={$westOrig}, e={$eastOrig}, " . "n={$northOrig}, s={$southOrig}, record " . "{$this->source}." . $this->getID(), Logger::WARNING); } else { // Try to cope with weird coordinate order if ($north > $south) { list($north, $south) = [$south, $north]; } if ($west > $east) { list($west, $east) = [$east, $west]; } $data['location_geo'] = "ENVELOPE({$west}, {$east}, {$south}, {$north})"; } } else { $data['location_geo'] = "POINT({$west} {$north})"; } } } } // Classifications foreach ($this->getFields('080') as $field080) { $classification = trim($this->getSubfield($field080, 'a')); $classification .= trim($this->getSubfield($field080, 'b')); if ($classification) { $aux = $this->getSubfields($field080, ['x' => 1]); if ($aux) { $classification .= " {$aux}"; } $data['classification_txt_mv'][] = "udk {$classification}"; } } $dlc = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]]); foreach ($dlc as $classification) { $data['classification_txt_mv'][] = 'dlc ' . mb_strtolower(str_replace(' ', '', $classification), 'UTF-8'); } foreach ($this->getFields('084') as $field) { $source = $this->getSubfield($field, '2'); $classification = $this->getSubfields($field, ['a' => 1, 'b' => 1]); if ($source) { $data['classification_txt_mv'][] = "{$source} " . mb_strtolower(str_replace(' ', '', $classification), 'UTF-8'); } } if (isset($data['classification_txt_mv'])) { $data['allfields'] = array_merge($data['allfields'], $data['classification_txt_mv']); } // Keep classification_str_mv for backward-compatibility for now if (isset($data['classification_txt_mv'])) { $data['classification_str_mv'] = $data['classification_txt_mv']; } // Ebrary location $ebraryLocs = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '035', ['a' => 1]]]); foreach ($ebraryLocs as $field) { if (strncmp($field, 'ebr', 3) == 0 && is_numeric(substr($field, 3))) { if (!isset($data['building']) || !in_array('EbraryDynamic', $data['building'])) { $data['building'][] = 'EbraryDynamic'; } } } // Topics if (strncmp($this->source, 'metalib', 7) == 0) { $field653 = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '653', ['a' => 1]]]); $data['topic'] = array_merge($data['topic'], $field653); $data['topic_facet'] = array_merge($data['topic_facet'], $field653); } // Original Study Number $data['ctrlnum'] = array_merge($data['ctrlnum'], $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '036', ['a' => 1]]])); // Source $data['source_str_mv'] = $this->source; $data['datasource_str_mv'] = [$this->source]; // ISSN $data['issn'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '022', ['a' => 1]]]); foreach ($data['issn'] as &$value) { $value = str_replace('-', '', $value); } $data['other_issn_str_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '440', ['x' => 1]], [MarcRecord::GET_NORMAL, '480', ['x' => 1]], [MarcRecord::GET_NORMAL, '730', ['x' => 1]], [MarcRecord::GET_NORMAL, '776', ['x' => 1]]]); foreach ($data['other_issn_str_mv'] as &$value) { $value = str_replace('-', '', $value); } $data['linking_issn_str_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '022', ['l' => 1]]]); foreach ($data['linking_issn_str_mv'] as &$value) { $value = str_replace('-', '', $value); } // URLs $fields = $this->getFields('856'); foreach ($fields as $field) { $ind2 = $this->getIndicator($field, 2); $sub3 = $this->getSubfield($field, 3); if (($ind2 == '0' || $ind2 == '1') && !$sub3) { $url = trim($this->getSubfield($field, 'u')); if (!$url) { continue; } // Require at least one dot surrounded by valid characters or a // familiar scheme if (!preg_match('/[A-Za-z0-9]\\.[A-Za-z0-9]/', $url) && !preg_match('/^(http|ftp)s?:\\/\\//', $url)) { continue; } $data['online_boolean'] = true; $data['online_str_mv'] = $this->source; $linkText = $this->getSubfield($field, 'y'); if (!$linkText) { $linkText = $this->getSubfield($field, 'z'); } $link = ['url' => $this->getSubfield($field, 'u'), 'text' => $linkText, 'source' => $this->source]; $data['online_urls_str_mv'][] = json_encode($link); } } // Holdings $data['holdings_txtP_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '852', ['a' => 1, 'b' => 1, 'h' => 1, 'z' => 1]]]); if (!empty($data['holdings_txtP_mv'])) { $updateFunc = function (&$val, $k, $source) { $val .= " {$source}"; }; array_walk($data['holdings_txtP_mv'], $updateFunc, $this->source); } // Access restrictions if ($restrictions = $this->getAccessRestrictions()) { $data['restricted_str'] = $restrictions; } // ISMN foreach ($this->getFields('024') as $field024) { if ($this->getIndicator($field024, 1) == '2') { $ismn = $this->getSubfield($field024, 'a'); $ismn = str_replace('-', '', $ismn); if (!preg_match('{([0-9]{13})}', $ismn, $matches)) { continue; } $data['ismn_isn_mv'] = $matches[1]; } } // Project ID in 960 (Fennica) if ($this->getDriverParam('projectIdIn960', false)) { $data['project_id_str_mv'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '960', ['a' => 1]]]); } // Hierarchical Categories (MetaLib) foreach ($this->getFields('976') as $field976) { $category = $this->getSubfield($field976, 'a'); $category = trim(str_replace(['/', '\\'], '', $category), " -\t\n\r\v"); if (!$category) { continue; } $sub = $this->getSubfield($field976, 'b'); $sub = trim(str_replace(['/', '\\'], '', $sub), " -\t\n\r\v"); if ($sub) { $category .= "/{$sub}"; } $data['category_str_mv'][] = $category; } // Hierarchical categories (e.g. SFX) if ($this->getDriverParam('categoriesIn650', false)) { foreach ($this->getFields('650') as $field650) { $category = $this->getSubfield($field650, 'a'); $category = trim(str_replace(['/', '\\'], '', $category)); if (!$category) { continue; } $sub = $this->getSubfield($field650, 'x'); $sub = trim(str_replace(['/', '\\'], '', $sub)); if ($sub) { $category .= "/{$sub}"; } $data['category_str_mv'][] = $category; } } // Call numbers $data['callnumber-first'] = strtoupper(str_replace(' ', '', $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]]))); $data['callnumber-raw'] = array_map('strtoupper', $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]])); $data['callnumber-sort'] = empty($data['callnumber-raw']) ? '' : $data['callnumber-raw'][0]; // Legacy callnumber fields. TODO: Remove when VuFind 1 is gone. $data['callnumber'] = strtoupper(str_replace(' ', '', $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]]))); $data['callnumber-a'] = $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1]]]); $data['callnumber-first-code'] = substr($data['callnumber-a'], 0, 1); return $data; }
/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @return string[] */ public function toSolrArray() { $data = parent::toSolrArray(); // Kantapuu oai provides just the consortium name as the legal body name, // so getting the actual institution name from the rightsholder information if ($data['institution'] == 'Kantapuu' || $data['institution'] == 'Akseli') { $data['institution'] = $this->getRightsHolderLegalBodyName(); } // Handle sources that contain multiple organisations properly if ($this->getDriverParam('institutionInBuilding', false)) { $data['building'] = reset(explode('/', $data['institution'])); } if ($data['collection'] && $this->getDriverParam('collectionInBuilding', false)) { if (isset($data['building']) && $data['building']) { $data['building'] .= '/' . $data['collection']; } else { $data['building'] = $data['collection']; } } // REMOVE THIS ONCE TUUSULA IS FIXED // sometimes there are multiple subjects in one element // separated with commas like "foo, bar, baz" (Tuusula) $topic = []; if (isset($data['topic']) && is_array($data['topic'])) { foreach ($data['topic'] as $subject) { $exploded = explode(',', $subject); foreach ($exploded as $explodedSubject) { $topic[] = trim($explodedSubject); } } } $data['topic'] = $data['topic_facet'] = $topic; // END OF TUUSULA FIX $data['artist_str_mv'] = $this->getActor('valmistus', 'taiteilija'); $data['photographer_str_mv'] = $this->getActor('valmistus', 'valokuvaaja'); $data['finder_str_mv'] = $this->getActor('löytyminen', 'löytäjä'); $data['manufacturer_str_mv'] = $this->getActor('valmistus', 'valmistaja'); $data['designer_str_mv'] = $this->getActor('suunnittelu', 'suunnittelija'); // Keep classification_str_mv for backward-compatibility for now $data['classification_txt_mv'] = $data['classification_str_mv'] = $this->getClassifications(); $data['exhibition_str_mv'] = $this->getEventNames('näyttely'); foreach ($this->getSubjectDateRanges() as $range) { if (!isset($data['main_date_str'])) { $data['main_date_str'] = MetadataUtils::extractYear($range[0]); $data['main_date'] = $this->validateDate($range[0]); } $data['search_sdaterange_mv'][] = MetadataUtils::dateRangeToNumeric($range); $data['search_daterange_mv'][] = MetadataUtils::dateRangeToStr($range); } $daterange = $this->getDateRange('valmistus'); if ($daterange) { if (!isset($data['main_date_str'])) { $data['main_date_str'] = MetadataUtils::extractYear($daterange[0]); $data['main_date'] = $this->validateDate($daterange[0]); } $data['search_sdaterange_mv'][] = $data['creation_sdaterange'] = MetadataUtils::dateRangeToNumeric($daterange); $data['search_daterange_mv'][] = $data['creation_daterange'] = MetadataUtils::dateRangeToStr($daterange); } else { $dateSources = ['suunnittelu' => 'design', 'tuotanto' => 'production', 'kuvaus' => 'photography']; foreach ($dateSources as $dateSource => $field) { $daterange = $this->getDateRange($dateSource); if ($daterange) { $data[$field . '_sdaterange'] = MetadataUtils::dateRangeToNumeric($daterange); $data[$field . '_daterange'] = MetadataUtils::dateRangeToStr($daterange); if (!isset($data['search_sdaterange_mv'])) { $data['search_sdaterange_mv'][] = $data[$field . '_sdaterange']; } if (!isset($data['search_daterange_mv'])) { $data['search_daterange_mv'][] = $data[$field . '_daterange']; } if (!isset($data['main_date_str'])) { $data['main_date_str'] = MetadataUtils::extractYear($daterange[0]); $data['main_date'] = $this->validateDate($daterange[0]); } } } } if ($range = $this->getDateRange('käyttö')) { $data['use_sdaterange'] = MetadataUtils::dateRangeToNumeric($range); $data['use_daterange'] = MetadataUtils::dateRangeToStr($range); } if ($range = $this->getDateRange('löytyminen')) { $data['finding_sdaterange'] = MetadataUtils::dateRangeToNumeric($range); $data['finding_daterange'] = MetadataUtils::dateRangeToStr($range); } $data['source_str_mv'] = $this->source; $data['datasource_str_mv'] = $this->source; if ($this->getURLs()) { $data['online_boolean'] = true; $data['online_str_mv'] = $this->source; } $data['location_geo'] = $this->getEventPlaceCoordinates(); // Usage rights if ($rights = $this->getUsageRights()) { $data['usage_rights_str_mv'] = $rights; } $allfields[] = $this->getRecordSourceOrganization(); return $data; }
/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @param boolean $prependTitleWithSubtitle If true and title_sub differs from * title_short, title is formed by combining title_sub and title_short * * @return string[] */ public function toSolrArray($prependTitleWithSubtitle) { $data = parent::toSolrArray($prependTitleWithSubtitle); $doc = $this->doc; $unitDateRange = $this->parseDateRange((string) $doc->did->unitdate); $data['search_sdaterange_mv'] = $data['unit_sdaterange'] = MetadataUtils::dateRangeToNumeric($unitDateRange); $data['search_daterange_mv'] = $data['unit_daterange'] = MetadataUtils::dateRangeToStr($unitDateRange); if ($unitDateRange) { $data['main_date_str'] = MetadataUtils::extractYear($unitDateRange[0]); $data['main_date'] = $this->validateDate($unitDateRange[0]); // Append year range to title (only years, not the full dates) $startYear = MetadataUtils::extractYear($unitDateRange[0]); $endYear = MetadataUtils::extractYear($unitDateRange[1]); $yearRange = ''; if ($startYear != '-9999') { $yearRange = $startYear; } if ($endYear != $startYear) { $yearRange .= '-'; if ($endYear != '9999') { $yearRange .= $endYear; } } if ($yearRange) { $len = strlen($yearRange); foreach (['title_full', 'title_sort', 'title', 'title_short'] as $field) { if (substr($data[$field], -$len) != $yearRange && substr($data[$field], -$len - 2) != "({$yearRange})") { $data[$field] .= " ({$yearRange})"; } } } } // Single-valued sequence for sorting if (isset($data['hierarchy_sequence'])) { $data['hierarchy_sequence_str'] = $data['hierarchy_sequence']; } $data['source_str_mv'] = isset($data['institution']) ? $data['institution'] : $this->source; $data['datasource_str_mv'] = $this->source; // Digitized? if ($doc->did->daogrp) { if (in_array($data['format'], ['collection', 'series', 'fonds', 'item'])) { $data['format'] = 'digitized_' . $data['format']; } if ($this->doc->did->daogrp->daoloc) { foreach ($this->doc->did->daogrp->daoloc as $daoloc) { if ($daoloc->attributes()->{'href'}) { $data['online_boolean'] = true; // This is sort of special. Make sure to use source instead // of datasource. $data['online_str_mv'] = $data['source_str_mv']; break; } } } } if (isset($doc->did->unitid)) { $data['identifier'] = (string) $doc->did->unitid; } if (isset($doc->did->dimensions)) { // display measurements $data['measurements'] = (string) $doc->did->dimensions; } if (isset($doc->did->physdesc)) { $data['material'] = (string) $doc->did->physdesc; } if (isset($doc->did->accessrestrict->p)) { $data['rights'] = (string) $doc->did->accessrestrict->p; } // Usage rights if ($rights = $this->getUsageRights()) { $data['usage_rights_str_mv'] = $rights; } return $data; }