/** * Return fields to be indexed in Solr (an alternative to an XSL transformation) * * @return string[] */ public function toSolrArray() { // Add source prefix to IDs in link fields $fields = ['760', '762', '765', '767', '770', '772', '773', '774', '775', '776', '777', '780', '785', '786', '787']; foreach ($fields as $code) { if (isset($this->fields[$code])) { foreach ($this->fields[$code] as &$marcfield) { if (isset($marcfield['s'])) { foreach ($marcfield['s'] as &$marcsubfield) { if (key($marcsubfield) == 'w') { $marcsubfield['w'] = $this->idPrefix . '.' . $marcsubfield['w']; } } } } } } $data = parent::toSolrArray(); // building $data['building'] = []; if ($this->getDriverParam('holdingsInBuilding', true)) { foreach ($this->getFields('852') as $field) { $location = $this->getSubfield($field, 'b'); if ($location) { $data['building'][] = $location; } } } // long_lat $field = $this->getField('034'); if ($field) { $westOrig = $this->getSubfield($field, 'd'); $eastOrig = $this->getSubfield($field, 'e'); $northOrig = $this->getSubfield($field, 'f'); $southOrig = $this->getSubfield($field, 'g'); $west = MetadataUtils::coordinateToDecimal($westOrig); $east = MetadataUtils::coordinateToDecimal($eastOrig); $north = MetadataUtils::coordinateToDecimal($northOrig); $south = MetadataUtils::coordinateToDecimal($southOrig); if (!is_nan($west) && !is_nan($north)) { if (!is_nan($east)) { $longitude = ($west + $east) / 2; } else { $longitude = $west; } if (!is_nan($south)) { $latitude = ($north + $south) / 2; } else { $latitude = $north; } if ($longitude < -180 || $longitude > 180 || ($latitude < -90 || $latitude > 90)) { global $logger; $logger->log('MarcRecord', "Discarding invalid coordinates {$longitude},{$latitude} " . "decoded from w={$westOrig}, e={$eastOrig}, n={$northOrig}, " . "s={$southOrig}, record {$this->source}." . $this->getID(), Logger::WARNING); } else { $data['long_lat'] = "{$longitude},{$latitude}"; } } } // lccn $data['lccn'] = $this->getFieldSubfields('010', ['a' => 1]); $data['ctrlnum'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '035', ['a' => 1]]]); $data['fullrecord'] = $this->toISO2709(); if (!$data['fullrecord']) { // In case the record exceeds 99999 bytes... $data['fullrecord'] = $this->toXML(); } $data['allfields'] = $this->getAllFields(); // language $languages = $this->getLanguages(); foreach ($languages as $language) { if (preg_match('/^\\w{3}$/', $language) && $language != 'zxx' && $language != 'und') { $data['language'][] = $language; } } $data['format'] = $this->getFormat(); $data['author'] = $this->getFieldSubfields('100', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'e' => 1]); $data['author_fuller'] = $this->getFieldSubfields('100', ['q' => 1]); $data['author-letter'] = $this->getFieldSubfields('100', ['a' => 1]); $data['author2'] = $this->getFieldsSubfields([[MarcRecord::GET_ALT, '100', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1]], [MarcRecord::GET_BOTH, '110', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '111', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '700', ['a' => 1, 'q' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'e' => 1]], [MarcRecord::GET_BOTH, '710', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '711', ['a' => 1, 'b' => 1]]]); $key = array_search($data['author'], $data['author2']); if ($key !== false) { unset($data['author2'][$key]); } $data['author2'] = array_filter(array_values($data['author2'])); $data['author2-role'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '700', ['e' => 1]], [MarcRecord::GET_BOTH, '710', ['e' => 1]]], true); $data['author_additional'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '505', ['r' => 1]]], true); $data['title'] = $this->getTitle(); $data['title_sub'] = $this->getFieldSubfields('245', ['b' => 1, 'n' => 1, 'p' => 1]); $data['title_short'] = $this->getFieldSubfields('245', ['a' => 1]); $data['title_full'] = $this->getFieldSubfields('245', ['a' => 1, 'b' => 1, 'c' => 1, 'f' => 1, 'g' => 1, 'h' => 1, 'k' => 1, 'n' => 1, 'p' => 1, 's' => 1]); $data['title_alt'] = array_values(array_unique($this->getFieldsSubfields([[MarcRecord::GET_ALT, '245', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '130', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '240', ['a' => 1]], [MarcRecord::GET_BOTH, '246', ['g' => 1]], [MarcRecord::GET_BOTH, '730', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '740', ['a' => 1]]]))); $data['title_old'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '780', ['a' => 1, 's' => 1, 't' => 1]]]); $data['title_new'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '785', ['a' => 1, 's' => 1, 't' => 1]]]); $data['title_sort'] = $this->getTitle(true); if (!$data['title_short']) { $data['title_short'] = $this->getFieldSubfields('240', ['a' => 1, 'n' => 1, 'p' => 1]); $data['title_full'] = $this->getFieldSubfields('240'); } $data['series'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '440', ['a' => 1]], [MarcRecord::GET_BOTH, '490', ['a' => 1]], [MarcRecord::GET_BOTH, '800', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'f' => 1, 'p' => 1, 'q' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '830', ['a' => 1, 'p' => 1]]]); $data['publisher'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '260', ['b' => 1]]], false, true); if (!$data['publisher']) { $fields = $this->getFields('264'); foreach ($fields as $field) { if ($this->getIndicator($field, 2) == '1') { $data['publisher'] = metadataUtils::stripTrailingPunctuation($this->getSubfield($field, 'b')); break; } } } $publicationYear = $this->getPublicationYear(); if ($publicationYear) { $data['publishDateSort'] = $publicationYear; $data['publishDate'] = [$publicationYear]; } $data['physical'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '300', ['a' => 1, 'b' => 1, 'c' => 1, 'e' => 1, 'f' => 1, 'g' => 1]], [MarcRecord::GET_BOTH, '530', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1]]]); $data['dateSpan'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '362', ['a' => 1]]]); $data['edition'] = $this->getFieldSubfields('250', ['a' => 1]); $data['contents'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '505', ['a' => 1]], [MarcRecord::GET_BOTH, '505', ['t' => 1]]]); $data['isbn'] = $this->getISBNs(); foreach ($this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '773', ['z' => 1]]]) as $isbn) { $isbn = str_replace('-', '', $isbn); if (!preg_match('{([0-9]{9,12}[0-9xX])}', $isbn, $matches)) { continue; } $isbn = $matches[1]; if (strlen($isbn) == 10) { $isbn = MetadataUtils::isbn10to13($isbn); } if ($isbn) { $data['isbn'][] = $isbn; } } $data['issn'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '022', ['a' => 1]], [MarcRecord::GET_NORMAL, '440', ['x' => 1]], [MarcRecord::GET_NORMAL, '490', ['x' => 1]], [MarcRecord::GET_NORMAL, '730', ['x' => 1]], [MarcRecord::GET_NORMAL, '773', ['x' => 1]], [MarcRecord::GET_NORMAL, '776', ['x' => 1]], [MarcRecord::GET_NORMAL, '780', ['x' => 1]], [MarcRecord::GET_NORMAL, '785', ['x' => 1]]]); foreach ($data['issn'] as &$value) { $value = str_replace('-', '', $value); } $data['callnumber-first'] = $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '099', ['a' => 1]], [MarcRecord::GET_NORMAL, '090', ['a' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1]]]); $values = $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '090', ['a' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1]]]); if ($values) { if (preg_match('/^([A-Z]+)/', strtoupper($values[0]), $matches)) { $data['callnumber-subject'] = $matches[1]; } $dotPos = strstr($values[0], '.'); if ($dotPos > 0) { $data['callnumber-label'] = strtoupper(substr($values[1], 0, $dotPos)); } else { $data['callnumber-label'] = strtoupper($values[0]); } } $data['callnumber-raw'] = array_map('strtoupper', $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]])); foreach ($data['callnumber-raw'] as $callnumber) { $cn = new LcCallNumber($callnumber); if ($cn->isValid()) { $data['callnumber-sort'] = $cn->getSortKey(); } } if (empty($data['callnumber-sort']) && !empty($data['callnumber-raw'])) { $cn = new LcCallNumber($data['callnumber-raw'][0]); $data['callnumber-sort'] = $cn->getSortKey(); } $data['topic'] = $this->getTopics(); $data['genre'] = $this->getGenres(); $data['geographic'] = $this->getGeographicTopics(); $data['era'] = $this->getEras(); $data['topic_facet'] = $this->getTopicFacets(); $data['genre_facet'] = $this->getGenreFacets(); $data['geographic_facet'] = $this->getGeographicFacets(); $data['era_facet'] = $this->getEraFacets(); $data['url'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '856', ['u' => 1]]]); $data['illustrated'] = $this->getIllustrated(); // TODO: dewey fields and OCLC numbers return $data; }
/** * Dedup: Return ISBNs in ISBN-13 format without dashes * * @return string[] */ public function getISBNs() { $arr = []; foreach ($this->doc->identifier as $identifier) { $identifier = str_replace('-', '', $identifier); if (!preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { continue; } $isbn = $matches[1]; if (strlen($isbn) == 10) { $isbn = MetadataUtils::isbn10to13($isbn); } if ($isbn) { $arr[] = $isbn; } } return array_values(array_unique($arr)); }
/** * Normalize an ISBN to ISBN-13 without dashes * * @param string $isbn ISBN to normalize * * @return string Normalized ISBN or empty string */ public static function normalizeISBN($isbn) { $isbn = str_replace('-', '', $isbn); if (!preg_match('{([0-9]{9,12}[0-9xX])}', $isbn, $matches)) { return ''; } $isbn = $matches[1]; if (strlen($isbn) == 10) { $isbn = MetadataUtils::isbn10to13($isbn); } return $isbn; }