Example #1
0
 /**
  * Dedup: Return record title
  *
  * @param bool $forFiling Whether the title is to be used in filing
  * (e.g. sorting, non-filing characters should be removed)
  *
  * @return string
  */
 public function getTitle($forFiling = false)
 {
     $title = trim((string) $this->doc->title);
     $title = MetadataUtils::stripTrailingPunctuation($title);
     if ($forFiling) {
         $title = MetadataUtils::stripLeadingPunctuation($title);
         $title = MetadataUtils::stripLeadingArticle($title);
         // Again, just in case stripping the article affected this
         $title = MetadataUtils::stripLeadingPunctuation($title);
         $title = mb_strtolower($title, 'UTF-8');
     }
     return $title;
 }
Example #2
0
 /**
  * Get an array of all fields relevant to allfields search
  *
  * @return string[]
  */
 protected function getAllFields()
 {
     $subfieldFilter = ['650' => ['2' => 1, '6' => 1, '8' => 1], '773' => ['6' => 1, '7' => 1, '8' => 1, 'w' => 1], '856' => ['6' => 1, '8' => 1, 'q' => 1]];
     $allFields = [];
     foreach ($this->fields as $tag => $fields) {
         if ($tag >= 100 && $tag < 841 || $tag == 856 || $tag == 880) {
             foreach ($fields as $field) {
                 $subfields = $this->getAllSubfields($field, isset($subfieldFilter[$tag]) ? $subfieldFilter[$tag] : ['6' => 1, '8' => 1]);
                 if ($subfields) {
                     $allFields = array_merge($allFields, $subfields);
                 }
             }
         }
     }
     $allFields = array_map(function ($str) {
         return MetadataUtils::stripLeadingPunctuation(MetadataUtils::stripTrailingPunctuation($str));
     }, $allFields);
     return array_values(array_unique($allFields));
 }
Example #3
0
 /**
  * Split title to main title and description. Tries to find the first sentence
  * break where the title can be split.
  *
  * @param string $title Title to split
  *
  * @return null|string Null if title was not split, otherwise the initial
  * title part
  */
 public static function splitTitle($title)
 {
     $i = 0;
     $parenLevel = 0;
     $bracketLevel = 0;
     // Make sure the title has single spaces for whitespace
     $title = preg_replace('/\\s+/', ' ', $title);
     $titleWords = explode(' ', $title);
     foreach ($titleWords as $word) {
         ++$i;
         $parenLevel += substr_count($word, '(');
         $parenLevel -= substr_count($word, ')');
         $bracketLevel += substr_count($word, '[');
         $bracketLevel -= substr_count($word, ']');
         if ($parenLevel == 0 && $bracketLevel == 0) {
             // Try to avoid splitting at short words or the very beginning
             if (substr($word, -1) == '.' && strlen($word) > 2 && ($i > 1 || strlen($word) > 4)) {
                 // Verify that the word is strippable (not abbreviation etc.)
                 $leadStripped = MetadataUtils::stripLeadingPunctuation($word);
                 $stripped = metadataUtils::stripTrailingPunctuation($leadStripped);
                 $nextFirst = isset($titleWords[$i]) ? substr($titleWords[$i], 0, 1) : '';
                 // 1.) There has to be something following this word.
                 // 2.) The trailing period must be strippable or end with a year.
                 // 3.) Next word has to start with a capital or digit
                 // 4.) Not something like 12-p.
                 // 5.) Not initials like A.N.
                 if ($nextFirst && ($leadStripped != $stripped || preg_match('/^\\d{4}\\.$/', $word)) && (is_numeric($nextFirst) || !ctype_lower($nextFirst)) && !preg_match('/.+\\-\\w{1,2}\\.$/', $word) && !preg_match('/^\\w\\.\\w\\.$/', $word)) {
                     return metadataUtils::stripTrailingPunctuation(implode(' ', array_splice($titleWords, 0, $i)));
                 }
             }
         }
     }
     return null;
 }
Example #4
0
 /**
  * Return record title
  *
  * @param bool     $forFiling            Whether the title is to be used in
  * filing (e.g. sorting, non-filing characters should be removed)
  * @param string   $lang                 Language
  * @param string[] $excludedDescriptions Description types to exclude
  *
  * @return string
  */
 public function getTitle($forFiling = false, $lang = null, $excludedDescriptions = ['provenance'])
 {
     $titles = [];
     $allTitles = [];
     foreach ($this->getTitleSetNodes() as $set) {
         foreach ($set->appellationValue as $appellationValue) {
             if ($lang == null || $appellationValue['lang'] == $lang) {
                 $titles[] = (string) $appellationValue;
             }
             $allTitles[] = (string) $appellationValue;
         }
     }
     // Fallback to use any title in case none found with the specified language
     if (empty($titles)) {
         $titles = $allTitles;
     }
     if (empty($titles)) {
         return null;
     }
     $title = implode('; ', array_unique(array_filter($titles)));
     // Use description if title is the same as the work type
     // From LIDO specs:
     // "For objects from natural, technical, cultural history e.g. the object
     // name given here and the object type, recorded in the object / work
     // type element are often identical."
     if (strcasecmp($this->getObjectWorkType(), $title) == 0) {
         $descriptionWrapDescriptions = [];
         foreach ($this->getObjectDescriptionSetNodes($excludedDescriptions) as $set) {
             if ($set->descriptiveNoteValue) {
                 $descriptionWrapDescriptions[] = (string) $set->descriptiveNoteValue;
             }
         }
         if ($descriptionWrapDescriptions) {
             $title = implode('; ', $descriptionWrapDescriptions);
         }
     }
     if ($forFiling) {
         $title = MetadataUtils::stripLeadingPunctuation($title);
     }
     return $title;
 }
Example #5
0
 /**
  * Return fields to be indexed in Solr
  *
  * @param boolean $prependTitleWithSubtitle If true and title_sub differs from
  * title_short, title is formed by combining title_sub and title_short
  *
  * @return string[]
  */
 public function toSolrArray($prependTitleWithSubtitle)
 {
     $data = [];
     $doc = $this->doc;
     $data['ctrlnum'] = (string) $this->doc->attributes()->{'id'};
     $data['fullrecord'] = MetadataUtils::trimXMLWhitespace($doc->asXML());
     $data['allfields'] = $this->getAllFields($doc);
     if ($doc->scopecontent) {
         if ($doc->scopecontent->p) {
             // Join all p-elements into a flat string.
             $desc = [];
             foreach ($doc->scopecontent->p as $p) {
                 $desc[] = trim((string) $p);
             }
             $desc = implode('   /   ', $desc);
         } else {
             $desc = (string) $doc->scopecontent;
         }
         $data['description'] = $desc;
     }
     $authors = [];
     if ($names = $doc->xpath('controlaccess/persname')) {
         foreach ($names as $name) {
             if (trim((string) $name) !== '-') {
                 $authors[] = trim((string) $name);
             }
         }
     }
     if ($names = $doc->xpath('controlaccess/corpname')) {
         foreach ($names as $name) {
             $authors[] = trim((string) $name);
         }
     }
     if ($authors) {
         $data['author'] = array_shift($authors);
         $data['author-letter'] = $data['author'];
     }
     if ($authors) {
         $data['author2'] = $authors;
     }
     if ($doc->did->origination) {
         $data['author_additional'] = trim((string) $doc->did->origination->corpname);
     }
     if ($geoNames = $doc->xpath('controlaccess/geogname')) {
         $names = [];
         foreach ($geoNames as $name) {
             if (trim((string) $name) !== '-') {
                 $names[] = trim((string) $name);
             }
         }
         $data['geographic'] = $data['geographic_facet'] = $names;
     }
     if ($subjects = $doc->xpath('controlaccess/subject')) {
         $topics = [];
         foreach ($subjects as $subject) {
             if (trim((string) $subject) !== '-') {
                 $topics[] = trim((string) $subject);
             }
         }
         $data['topic'] = $data['topic_facet'] = $topics;
     }
     $genre = $doc->xpath('controlaccess/genreform');
     $data['format'] = (string) ($genre ? $genre[0] : $doc->attributes()->level);
     if (isset($doc->did->repository)) {
         $data['institution'] = (string) isset($doc->did->repository->corpname) ? $doc->did->repository->corpname : $doc->did->repository;
     }
     $data['title_sub'] = '';
     switch ($data['format']) {
         case 'fonds':
             break;
         case 'collection':
             break;
         case 'series':
         case 'subseries':
             $data['title_sub'] = (string) $doc->did->unitid;
             break;
         default:
             $data['title_sub'] = (string) $doc->did->unitid;
             if ($doc->{'add-data'}->parent) {
                 $data['series'] = (string) $doc->{'add-data'}->parent->attributes()->unittitle;
             }
             break;
     }
     $data['title_short'] = (string) $doc->did->unittitle;
     $data['title'] = '';
     if ($prependTitleWithSubtitle) {
         if ($data['title_sub'] && $data['title_sub'] != $data['title_short']) {
             $data['title'] = $data['title_sub'] . ' ';
         }
     }
     $data['title'] .= $data['title_short'];
     $data['title_full'] = $data['title_sort'] = $data['title'];
     $data['title_sort'] = mb_strtolower(MetadataUtils::stripLeadingPunctuation($data['title_sort']), 'UTF-8');
     if ($languages = $doc->did->xpath('langmaterial/language')) {
         foreach ($languages as $lang) {
             if (isset($lang->attributes()->langcode)) {
                 $langCode = trim((string) $lang->attributes()->langcode);
                 if ($langCode != '') {
                     $data['language'][] = $langCode;
                 }
             }
         }
     }
     if ($extents = $doc->did->xpath('physdesc/extent')) {
         foreach ($extents as $extent) {
             if (trim((string) $extent) !== '-') {
                 $data['physical'][] = (string) $extent;
             }
         }
     }
     $nodes = $this->doc->did->daogrp->xpath('daoloc[@role="image_thumbnail"]');
     if ($nodes) {
         // store first thumbnail
         $node = $nodes[0];
         if (isset($node->attributes()->href)) {
             $data['thumbnail'] = (string) $node->attributes()->href;
         }
     }
     $data['hierarchytype'] = 'Default';
     if ($this->doc->{'add-data'}->archive) {
         $archiveAttr = $this->doc->{'add-data'}->archive->attributes();
         $data['hierarchy_top_id'] = (string) $archiveAttr->{'id'};
         $data['hierarchy_top_title'] = (string) $archiveAttr->title;
         if ($archiveAttr->subtitle) {
             $data['hierarchy_top_title'] .= ' : ' . (string) $archiveAttr->subtitle;
         }
         $data['allfields'][] = $data['hierarchy_top_title'];
         if ($archiveAttr->sequence) {
             $data['hierarchy_sequence'] = (string) $archiveAttr->sequence;
         }
     }
     if ($this->doc->{'add-data'}->{'parent'}) {
         $data['hierarchy_parent_id'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->{'id'};
         $data['allfields'][] = $data['hierarchy_parent_title'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->title;
     } else {
         $data['is_hierarchy_id'] = $data['hierarchy_top_id'] = $this->getID();
         $data['is_hierarchy_title'] = $data['hierarchy_top_title'] = (string) $doc->did->unittitle;
     }
     return $data;
 }
Example #6
0
 /**
  * Get an array of all fields relevant to allfields search
  *
  * @return string[]
  */
 protected function getAllFields()
 {
     $subfieldFilter = ['650' => ['0' => 1, '2' => 1, '6' => 1, '8' => 1], '773' => ['0' => 1, '6' => 1, '7' => 1, '8' => 1, 'w' => 1], '856' => ['0' => 1, '6' => 1, '8' => 1, 'q' => 1], '979' => ['0' => 1, 'a' => 1, 'f' => 1]];
     $allFields = [];
     // Include ISBNs, also normalized if possible
     foreach ($this->getFields('020') as $field) {
         $isbns = $this->getSubfieldsArray($field, ['a' => 1, 'z' => 1]);
         foreach ($isbns as $isbn) {
             $allFields[] = $isbn;
             $isbn = MetadataUtils::normalizeISBN($isbn);
             if ($isbn) {
                 $allFields[] = $isbn;
             }
         }
     }
     foreach ($this->fields as $tag => $fields) {
         if ($tag >= 100 && $tag < 841 && $tag != 336 && $tag != 337 || $tag == 856 || $tag == 880 || $tag == 979) {
             foreach ($fields as $field) {
                 $subfields = $this->getAllSubfields($field, isset($subfieldFilter[$tag]) ? $subfieldFilter[$tag] : ['0' => 1, '6' => 1, '8' => 1]);
                 if ($subfields) {
                     $allFields = array_merge($allFields, $subfields);
                 }
             }
         }
     }
     $allFields = array_map(function ($str) {
         return MetadataUtils::stripLeadingPunctuation(MetadataUtils::stripTrailingPunctuation($str));
     }, $allFields);
     return array_values(array_unique($allFields));
 }