/** * Dedup: Return record title * * @param bool $forFiling Whether the title is to be used in filing * (e.g. sorting, non-filing characters should be removed) * * @return string */ public function getTitle($forFiling = false) { $title = trim((string) $this->doc->title); $title = MetadataUtils::stripTrailingPunctuation($title); if ($forFiling) { $title = MetadataUtils::stripLeadingPunctuation($title); $title = MetadataUtils::stripLeadingArticle($title); // Again, just in case stripping the article affected this $title = MetadataUtils::stripLeadingPunctuation($title); $title = mb_strtolower($title, 'UTF-8'); } return $title; }
/** * Get an array of all fields relevant to allfields search * * @return string[] */ protected function getAllFields() { $subfieldFilter = ['650' => ['2' => 1, '6' => 1, '8' => 1], '773' => ['6' => 1, '7' => 1, '8' => 1, 'w' => 1], '856' => ['6' => 1, '8' => 1, 'q' => 1]]; $allFields = []; foreach ($this->fields as $tag => $fields) { if ($tag >= 100 && $tag < 841 || $tag == 856 || $tag == 880) { foreach ($fields as $field) { $subfields = $this->getAllSubfields($field, isset($subfieldFilter[$tag]) ? $subfieldFilter[$tag] : ['6' => 1, '8' => 1]); if ($subfields) { $allFields = array_merge($allFields, $subfields); } } } } $allFields = array_map(function ($str) { return MetadataUtils::stripLeadingPunctuation(MetadataUtils::stripTrailingPunctuation($str)); }, $allFields); return array_values(array_unique($allFields)); }
/** * Split title to main title and description. Tries to find the first sentence * break where the title can be split. * * @param string $title Title to split * * @return null|string Null if title was not split, otherwise the initial * title part */ public static function splitTitle($title) { $i = 0; $parenLevel = 0; $bracketLevel = 0; // Make sure the title has single spaces for whitespace $title = preg_replace('/\\s+/', ' ', $title); $titleWords = explode(' ', $title); foreach ($titleWords as $word) { ++$i; $parenLevel += substr_count($word, '('); $parenLevel -= substr_count($word, ')'); $bracketLevel += substr_count($word, '['); $bracketLevel -= substr_count($word, ']'); if ($parenLevel == 0 && $bracketLevel == 0) { // Try to avoid splitting at short words or the very beginning if (substr($word, -1) == '.' && strlen($word) > 2 && ($i > 1 || strlen($word) > 4)) { // Verify that the word is strippable (not abbreviation etc.) $leadStripped = MetadataUtils::stripLeadingPunctuation($word); $stripped = metadataUtils::stripTrailingPunctuation($leadStripped); $nextFirst = isset($titleWords[$i]) ? substr($titleWords[$i], 0, 1) : ''; // 1.) There has to be something following this word. // 2.) The trailing period must be strippable or end with a year. // 3.) Next word has to start with a capital or digit // 4.) Not something like 12-p. // 5.) Not initials like A.N. if ($nextFirst && ($leadStripped != $stripped || preg_match('/^\\d{4}\\.$/', $word)) && (is_numeric($nextFirst) || !ctype_lower($nextFirst)) && !preg_match('/.+\\-\\w{1,2}\\.$/', $word) && !preg_match('/^\\w\\.\\w\\.$/', $word)) { return metadataUtils::stripTrailingPunctuation(implode(' ', array_splice($titleWords, 0, $i))); } } } } return null; }
/** * Return record title * * @param bool $forFiling Whether the title is to be used in * filing (e.g. sorting, non-filing characters should be removed) * @param string $lang Language * @param string[] $excludedDescriptions Description types to exclude * * @return string */ public function getTitle($forFiling = false, $lang = null, $excludedDescriptions = ['provenance']) { $titles = []; $allTitles = []; foreach ($this->getTitleSetNodes() as $set) { foreach ($set->appellationValue as $appellationValue) { if ($lang == null || $appellationValue['lang'] == $lang) { $titles[] = (string) $appellationValue; } $allTitles[] = (string) $appellationValue; } } // Fallback to use any title in case none found with the specified language if (empty($titles)) { $titles = $allTitles; } if (empty($titles)) { return null; } $title = implode('; ', array_unique(array_filter($titles))); // Use description if title is the same as the work type // From LIDO specs: // "For objects from natural, technical, cultural history e.g. the object // name given here and the object type, recorded in the object / work // type element are often identical." if (strcasecmp($this->getObjectWorkType(), $title) == 0) { $descriptionWrapDescriptions = []; foreach ($this->getObjectDescriptionSetNodes($excludedDescriptions) as $set) { if ($set->descriptiveNoteValue) { $descriptionWrapDescriptions[] = (string) $set->descriptiveNoteValue; } } if ($descriptionWrapDescriptions) { $title = implode('; ', $descriptionWrapDescriptions); } } if ($forFiling) { $title = MetadataUtils::stripLeadingPunctuation($title); } return $title; }
/** * Return fields to be indexed in Solr * * @param boolean $prependTitleWithSubtitle If true and title_sub differs from * title_short, title is formed by combining title_sub and title_short * * @return string[] */ public function toSolrArray($prependTitleWithSubtitle) { $data = []; $doc = $this->doc; $data['ctrlnum'] = (string) $this->doc->attributes()->{'id'}; $data['fullrecord'] = MetadataUtils::trimXMLWhitespace($doc->asXML()); $data['allfields'] = $this->getAllFields($doc); if ($doc->scopecontent) { if ($doc->scopecontent->p) { // Join all p-elements into a flat string. $desc = []; foreach ($doc->scopecontent->p as $p) { $desc[] = trim((string) $p); } $desc = implode(' / ', $desc); } else { $desc = (string) $doc->scopecontent; } $data['description'] = $desc; } $authors = []; if ($names = $doc->xpath('controlaccess/persname')) { foreach ($names as $name) { if (trim((string) $name) !== '-') { $authors[] = trim((string) $name); } } } if ($names = $doc->xpath('controlaccess/corpname')) { foreach ($names as $name) { $authors[] = trim((string) $name); } } if ($authors) { $data['author'] = array_shift($authors); $data['author-letter'] = $data['author']; } if ($authors) { $data['author2'] = $authors; } if ($doc->did->origination) { $data['author_additional'] = trim((string) $doc->did->origination->corpname); } if ($geoNames = $doc->xpath('controlaccess/geogname')) { $names = []; foreach ($geoNames as $name) { if (trim((string) $name) !== '-') { $names[] = trim((string) $name); } } $data['geographic'] = $data['geographic_facet'] = $names; } if ($subjects = $doc->xpath('controlaccess/subject')) { $topics = []; foreach ($subjects as $subject) { if (trim((string) $subject) !== '-') { $topics[] = trim((string) $subject); } } $data['topic'] = $data['topic_facet'] = $topics; } $genre = $doc->xpath('controlaccess/genreform'); $data['format'] = (string) ($genre ? $genre[0] : $doc->attributes()->level); if (isset($doc->did->repository)) { $data['institution'] = (string) isset($doc->did->repository->corpname) ? $doc->did->repository->corpname : $doc->did->repository; } $data['title_sub'] = ''; switch ($data['format']) { case 'fonds': break; case 'collection': break; case 'series': case 'subseries': $data['title_sub'] = (string) $doc->did->unitid; break; default: $data['title_sub'] = (string) $doc->did->unitid; if ($doc->{'add-data'}->parent) { $data['series'] = (string) $doc->{'add-data'}->parent->attributes()->unittitle; } break; } $data['title_short'] = (string) $doc->did->unittitle; $data['title'] = ''; if ($prependTitleWithSubtitle) { if ($data['title_sub'] && $data['title_sub'] != $data['title_short']) { $data['title'] = $data['title_sub'] . ' '; } } $data['title'] .= $data['title_short']; $data['title_full'] = $data['title_sort'] = $data['title']; $data['title_sort'] = mb_strtolower(MetadataUtils::stripLeadingPunctuation($data['title_sort']), 'UTF-8'); if ($languages = $doc->did->xpath('langmaterial/language')) { foreach ($languages as $lang) { if (isset($lang->attributes()->langcode)) { $langCode = trim((string) $lang->attributes()->langcode); if ($langCode != '') { $data['language'][] = $langCode; } } } } if ($extents = $doc->did->xpath('physdesc/extent')) { foreach ($extents as $extent) { if (trim((string) $extent) !== '-') { $data['physical'][] = (string) $extent; } } } $nodes = $this->doc->did->daogrp->xpath('daoloc[@role="image_thumbnail"]'); if ($nodes) { // store first thumbnail $node = $nodes[0]; if (isset($node->attributes()->href)) { $data['thumbnail'] = (string) $node->attributes()->href; } } $data['hierarchytype'] = 'Default'; if ($this->doc->{'add-data'}->archive) { $archiveAttr = $this->doc->{'add-data'}->archive->attributes(); $data['hierarchy_top_id'] = (string) $archiveAttr->{'id'}; $data['hierarchy_top_title'] = (string) $archiveAttr->title; if ($archiveAttr->subtitle) { $data['hierarchy_top_title'] .= ' : ' . (string) $archiveAttr->subtitle; } $data['allfields'][] = $data['hierarchy_top_title']; if ($archiveAttr->sequence) { $data['hierarchy_sequence'] = (string) $archiveAttr->sequence; } } if ($this->doc->{'add-data'}->{'parent'}) { $data['hierarchy_parent_id'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->{'id'}; $data['allfields'][] = $data['hierarchy_parent_title'] = (string) $this->doc->{'add-data'}->{'parent'}->attributes()->title; } else { $data['is_hierarchy_id'] = $data['hierarchy_top_id'] = $this->getID(); $data['is_hierarchy_title'] = $data['hierarchy_top_title'] = (string) $doc->did->unittitle; } return $data; }
/** * Get an array of all fields relevant to allfields search * * @return string[] */ protected function getAllFields() { $subfieldFilter = ['650' => ['0' => 1, '2' => 1, '6' => 1, '8' => 1], '773' => ['0' => 1, '6' => 1, '7' => 1, '8' => 1, 'w' => 1], '856' => ['0' => 1, '6' => 1, '8' => 1, 'q' => 1], '979' => ['0' => 1, 'a' => 1, 'f' => 1]]; $allFields = []; // Include ISBNs, also normalized if possible foreach ($this->getFields('020') as $field) { $isbns = $this->getSubfieldsArray($field, ['a' => 1, 'z' => 1]); foreach ($isbns as $isbn) { $allFields[] = $isbn; $isbn = MetadataUtils::normalizeISBN($isbn); if ($isbn) { $allFields[] = $isbn; } } } foreach ($this->fields as $tag => $fields) { if ($tag >= 100 && $tag < 841 && $tag != 336 && $tag != 337 || $tag == 856 || $tag == 880 || $tag == 979) { foreach ($fields as $field) { $subfields = $this->getAllSubfields($field, isset($subfieldFilter[$tag]) ? $subfieldFilter[$tag] : ['0' => 1, '6' => 1, '8' => 1]); if ($subfields) { $allFields = array_merge($allFields, $subfields); } } } } $allFields = array_map(function ($str) { return MetadataUtils::stripLeadingPunctuation(MetadataUtils::stripTrailingPunctuation($str)); }, $allFields); return array_values(array_unique($allFields)); }