Beispiel #1
0
 /**
  * Return fields to be indexed in Solr (an alternative to an XSL transformation)
  *
  * @return string[]
  */
 public function toSolrArray()
 {
     $data = parent::toSolrArray();
     if (isset($data['publishDate'])) {
         $data['main_date_str'] = MetadataUtils::extractYear($data['publishDate']);
         $data['main_date'] = $this->validateDate($this->getPublicationYear() . '-01-01T00:00:00Z');
     }
     if ($range = $this->getPublicationDateRange()) {
         $data['search_sdaterange_mv'][] = $data['publication_sdaterange'] = metadataUtils::dateRangeToNumeric($range);
         $data['search_daterange_mv'][] = $data['publication_daterange'] = metadataUtils::dateRangeToStr($range);
     }
     // language, take only first
     $languages = array_filter(explode(' ', (string) $this->doc->language), function ($value) {
         return preg_match('/^[a-z]{2,3}$/', $value) && $value != 'zxx' && $value != 'und';
     });
     $data['language'] = array_shift($languages);
     $data['source_str_mv'] = $this->source;
     $data['datasource_str_mv'] = $this->source;
     return $data;
 }
Beispiel #2
0
 /**
  * Return fields to be indexed in Solr (an alternative to an XSL transformation)
  *
  * @return string[]
  */
 public function toSolrArray()
 {
     // Add source prefix to IDs in link fields
     $fields = ['760', '762', '765', '767', '770', '772', '773', '774', '775', '776', '777', '780', '785', '786', '787'];
     foreach ($fields as $code) {
         if (isset($this->fields[$code])) {
             foreach ($this->fields[$code] as &$marcfield) {
                 if (isset($marcfield['s'])) {
                     foreach ($marcfield['s'] as &$marcsubfield) {
                         if (key($marcsubfield) == 'w') {
                             $marcsubfield['w'] = $this->idPrefix . '.' . $marcsubfield['w'];
                         }
                     }
                 }
             }
         }
     }
     $data = parent::toSolrArray();
     // building
     $data['building'] = [];
     if ($this->getDriverParam('holdingsInBuilding', true)) {
         foreach ($this->getFields('852') as $field) {
             $location = $this->getSubfield($field, 'b');
             if ($location) {
                 $data['building'][] = $location;
             }
         }
     }
     // long_lat
     $field = $this->getField('034');
     if ($field) {
         $westOrig = $this->getSubfield($field, 'd');
         $eastOrig = $this->getSubfield($field, 'e');
         $northOrig = $this->getSubfield($field, 'f');
         $southOrig = $this->getSubfield($field, 'g');
         $west = MetadataUtils::coordinateToDecimal($westOrig);
         $east = MetadataUtils::coordinateToDecimal($eastOrig);
         $north = MetadataUtils::coordinateToDecimal($northOrig);
         $south = MetadataUtils::coordinateToDecimal($southOrig);
         if (!is_nan($west) && !is_nan($north)) {
             if (!is_nan($east)) {
                 $longitude = ($west + $east) / 2;
             } else {
                 $longitude = $west;
             }
             if (!is_nan($south)) {
                 $latitude = ($north + $south) / 2;
             } else {
                 $latitude = $north;
             }
             if ($longitude < -180 || $longitude > 180 || ($latitude < -90 || $latitude > 90)) {
                 global $logger;
                 $logger->log('MarcRecord', "Discarding invalid coordinates {$longitude},{$latitude} " . "decoded from w={$westOrig}, e={$eastOrig}, n={$northOrig}, " . "s={$southOrig}, record {$this->source}." . $this->getID(), Logger::WARNING);
             } else {
                 $data['long_lat'] = "{$longitude},{$latitude}";
             }
         }
     }
     // lccn
     $data['lccn'] = $this->getFieldSubfields('010', ['a' => 1]);
     $data['ctrlnum'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '035', ['a' => 1]]]);
     $data['fullrecord'] = $this->toISO2709();
     if (!$data['fullrecord']) {
         // In case the record exceeds 99999 bytes...
         $data['fullrecord'] = $this->toXML();
     }
     $data['allfields'] = $this->getAllFields();
     // language
     $languages = $this->getLanguages();
     foreach ($languages as $language) {
         if (preg_match('/^\\w{3}$/', $language) && $language != 'zxx' && $language != 'und') {
             $data['language'][] = $language;
         }
     }
     $data['format'] = $this->getFormat();
     $data['author'] = $this->getFieldSubfields('100', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'e' => 1]);
     $data['author_fuller'] = $this->getFieldSubfields('100', ['q' => 1]);
     $data['author-letter'] = $this->getFieldSubfields('100', ['a' => 1]);
     $data['author2'] = $this->getFieldsSubfields([[MarcRecord::GET_ALT, '100', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1]], [MarcRecord::GET_BOTH, '110', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '111', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '700', ['a' => 1, 'q' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'e' => 1]], [MarcRecord::GET_BOTH, '710', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '711', ['a' => 1, 'b' => 1]]]);
     $key = array_search($data['author'], $data['author2']);
     if ($key !== false) {
         unset($data['author2'][$key]);
     }
     $data['author2'] = array_filter(array_values($data['author2']));
     $data['author2-role'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '700', ['e' => 1]], [MarcRecord::GET_BOTH, '710', ['e' => 1]]], true);
     $data['author_additional'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '505', ['r' => 1]]], true);
     $data['title'] = $this->getTitle();
     $data['title_sub'] = $this->getFieldSubfields('245', ['b' => 1, 'n' => 1, 'p' => 1]);
     $data['title_short'] = $this->getFieldSubfields('245', ['a' => 1]);
     $data['title_full'] = $this->getFieldSubfields('245', ['a' => 1, 'b' => 1, 'c' => 1, 'f' => 1, 'g' => 1, 'h' => 1, 'k' => 1, 'n' => 1, 'p' => 1, 's' => 1]);
     $data['title_alt'] = array_values(array_unique($this->getFieldsSubfields([[MarcRecord::GET_ALT, '245', ['a' => 1, 'b' => 1]], [MarcRecord::GET_BOTH, '130', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '240', ['a' => 1]], [MarcRecord::GET_BOTH, '246', ['g' => 1]], [MarcRecord::GET_BOTH, '730', ['a' => 1, 'd' => 1, 'f' => 1, 'g' => 1, 'k' => 1, 'l' => 1, 'n' => 1, 'p' => 1, 's' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '740', ['a' => 1]]])));
     $data['title_old'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '780', ['a' => 1, 's' => 1, 't' => 1]]]);
     $data['title_new'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '785', ['a' => 1, 's' => 1, 't' => 1]]]);
     $data['title_sort'] = $this->getTitle(true);
     if (!$data['title_short']) {
         $data['title_short'] = $this->getFieldSubfields('240', ['a' => 1, 'n' => 1, 'p' => 1]);
         $data['title_full'] = $this->getFieldSubfields('240');
     }
     $data['series'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '440', ['a' => 1]], [MarcRecord::GET_BOTH, '490', ['a' => 1]], [MarcRecord::GET_BOTH, '800', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1, 'f' => 1, 'p' => 1, 'q' => 1, 't' => 1]], [MarcRecord::GET_BOTH, '830', ['a' => 1, 'p' => 1]]]);
     $data['publisher'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '260', ['b' => 1]]], false, true);
     if (!$data['publisher']) {
         $fields = $this->getFields('264');
         foreach ($fields as $field) {
             if ($this->getIndicator($field, 2) == '1') {
                 $data['publisher'] = metadataUtils::stripTrailingPunctuation($this->getSubfield($field, 'b'));
                 break;
             }
         }
     }
     $publicationYear = $this->getPublicationYear();
     if ($publicationYear) {
         $data['publishDateSort'] = $publicationYear;
         $data['publishDate'] = [$publicationYear];
     }
     $data['physical'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '300', ['a' => 1, 'b' => 1, 'c' => 1, 'e' => 1, 'f' => 1, 'g' => 1]], [MarcRecord::GET_BOTH, '530', ['a' => 1, 'b' => 1, 'c' => 1, 'd' => 1]]]);
     $data['dateSpan'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '362', ['a' => 1]]]);
     $data['edition'] = $this->getFieldSubfields('250', ['a' => 1]);
     $data['contents'] = $this->getFieldsSubfields([[MarcRecord::GET_BOTH, '505', ['a' => 1]], [MarcRecord::GET_BOTH, '505', ['t' => 1]]]);
     $data['isbn'] = $this->getISBNs();
     foreach ($this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '773', ['z' => 1]]]) as $isbn) {
         $isbn = str_replace('-', '', $isbn);
         if (!preg_match('{([0-9]{9,12}[0-9xX])}', $isbn, $matches)) {
             continue;
         }
         $isbn = $matches[1];
         if (strlen($isbn) == 10) {
             $isbn = MetadataUtils::isbn10to13($isbn);
         }
         if ($isbn) {
             $data['isbn'][] = $isbn;
         }
     }
     $data['issn'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '022', ['a' => 1]], [MarcRecord::GET_NORMAL, '440', ['x' => 1]], [MarcRecord::GET_NORMAL, '490', ['x' => 1]], [MarcRecord::GET_NORMAL, '730', ['x' => 1]], [MarcRecord::GET_NORMAL, '773', ['x' => 1]], [MarcRecord::GET_NORMAL, '776', ['x' => 1]], [MarcRecord::GET_NORMAL, '780', ['x' => 1]], [MarcRecord::GET_NORMAL, '785', ['x' => 1]]]);
     foreach ($data['issn'] as &$value) {
         $value = str_replace('-', '', $value);
     }
     $data['callnumber-first'] = $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '099', ['a' => 1]], [MarcRecord::GET_NORMAL, '090', ['a' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1]]]);
     $values = $this->getFirstFieldSubfields([[MarcRecord::GET_NORMAL, '090', ['a' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1]]]);
     if ($values) {
         if (preg_match('/^([A-Z]+)/', strtoupper($values[0]), $matches)) {
             $data['callnumber-subject'] = $matches[1];
         }
         $dotPos = strstr($values[0], '.');
         if ($dotPos > 0) {
             $data['callnumber-label'] = strtoupper(substr($values[1], 0, $dotPos));
         } else {
             $data['callnumber-label'] = strtoupper($values[0]);
         }
     }
     $data['callnumber-raw'] = array_map('strtoupper', $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '080', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '084', ['a' => 1, 'b' => 1]], [MarcRecord::GET_NORMAL, '050', ['a' => 1, 'b' => 1]]]));
     foreach ($data['callnumber-raw'] as $callnumber) {
         $cn = new LcCallNumber($callnumber);
         if ($cn->isValid()) {
             $data['callnumber-sort'] = $cn->getSortKey();
         }
     }
     if (empty($data['callnumber-sort']) && !empty($data['callnumber-raw'])) {
         $cn = new LcCallNumber($data['callnumber-raw'][0]);
         $data['callnumber-sort'] = $cn->getSortKey();
     }
     $data['topic'] = $this->getTopics();
     $data['genre'] = $this->getGenres();
     $data['geographic'] = $this->getGeographicTopics();
     $data['era'] = $this->getEras();
     $data['topic_facet'] = $this->getTopicFacets();
     $data['genre_facet'] = $this->getGenreFacets();
     $data['geographic_facet'] = $this->getGeographicFacets();
     $data['era_facet'] = $this->getEraFacets();
     $data['url'] = $this->getFieldsSubfields([[MarcRecord::GET_NORMAL, '856', ['u' => 1]]]);
     $data['illustrated'] = $this->getIllustrated();
     // TODO: dewey fields and OCLC numbers
     return $data;
 }
 /**
  * Split title to main title and description. Tries to find the first sentence
  * break where the title can be split.
  *
  * @param string $title Title to split
  *
  * @return null|string Null if title was not split, otherwise the initial
  * title part
  */
 public static function splitTitle($title)
 {
     $i = 0;
     $parenLevel = 0;
     $bracketLevel = 0;
     // Make sure the title has single spaces for whitespace
     $title = preg_replace('/\\s+/', ' ', $title);
     $titleWords = explode(' ', $title);
     foreach ($titleWords as $word) {
         ++$i;
         $parenLevel += substr_count($word, '(');
         $parenLevel -= substr_count($word, ')');
         $bracketLevel += substr_count($word, '[');
         $bracketLevel -= substr_count($word, ']');
         if ($parenLevel == 0 && $bracketLevel == 0) {
             // Try to avoid splitting at short words or the very beginning
             if (substr($word, -1) == '.' && strlen($word) > 2 && ($i > 1 || strlen($word) > 4)) {
                 // Verify that the word is strippable (not abbreviation etc.)
                 $leadStripped = MetadataUtils::stripLeadingPunctuation($word);
                 $stripped = metadataUtils::stripTrailingPunctuation($leadStripped);
                 $nextFirst = isset($titleWords[$i]) ? substr($titleWords[$i], 0, 1) : '';
                 // 1.) There has to be something following this word.
                 // 2.) The trailing period must be strippable or end with a year.
                 // 3.) Next word has to start with a capital or digit
                 // 4.) Not something like 12-p.
                 // 5.) Not initials like A.N.
                 if ($nextFirst && ($leadStripped != $stripped || preg_match('/^\\d{4}\\.$/', $word)) && (is_numeric($nextFirst) || !ctype_lower($nextFirst)) && !preg_match('/.+\\-\\w{1,2}\\.$/', $word) && !preg_match('/^\\w\\.\\w\\.$/', $word)) {
                     return metadataUtils::stripTrailingPunctuation(implode(' ', array_splice($titleWords, 0, $i)));
                 }
             }
         }
     }
     return null;
 }