/** * Perform normalization and analysis of MetaLib return value * (a single record) * * @param simplexml $record The xml record from MetaLib * * @return array The processed record array */ protected function process($record) { $record->registerXPathNamespace('m', 'http://www.loc.gov/MARC21/slim'); // TODO: can we get anything reliable from MetaLib results for format? $format = ''; $title = $this->getSingleValue($record, '245ab', ' : '); if ($addTitle = $this->getSingleValue($record, '245h')) { $title .= " {$addTitle}"; } $author = $this->getSingleValue($record, '100a'); $addAuthors = $this->getMultipleValues($record, '700a'); $sources = $this->getMultipleValues($record, 'SIDt'); $year = $this->getSingleValue($record, 'YR a'); $languages = $this->getMultipleValues($record, '041a'); $publishers = $this->getMultipleValues($record, '260b'); $urls = []; $res = $record->xpath("./m:datafield[@tag='856']"); foreach ($res as $value) { $value->registerXPathNamespace('m', 'http://www.loc.gov/MARC21/slim'); $url = $value->xpath("./m:subfield[@code='u']"); if ($url) { $desc = $value->xpath("./m:subfield[@code='y']"); if ($desc) { $urls[(string) $url[0]] = (string) $desc[0]; } else { $urls[(string) $url[0]] = (string) $url[0]; } } } $proxy = false; $ird = $this->getSingleValue($record, 'SIDd'); if ($ird) { $info = $this->getIRDInfo($ird); $proxy = $info['proxy'] == 'Y'; } $openurlParams = []; $opu = $this->getSingleValue($record, 'OPUa'); if ($opu) { $opuxml = simplexml_load_string($opu); $opuxml->registerXPathNamespace('ctx', 'info:ofi/fmt:xml:xsd:ctx'); $opuxml->registerXPathNamespace('rft', ''); //info:ofi/fmt:xml:xsd'); foreach ($opuxml->xpath('//*') as $element) { if (in_array($element->getName(), ['journal', 'author'])) { continue; } $value = trim((string) $element); if ($value) { $openurlParams[$element->getName()] = $value; // OpenURL might have many nicely parsed elements we can use switch ($element->getName()) { case 'date': if (empty($year)) { $year = $value; } break; case 'volume': $volume = $value; break; case 'issue': $issue = $value; break; case 'spage': $startPage = $value; break; case 'epage': $endPage = $value; break; } } } } $isbn = $this->getMultipleValues($record, '020a'); $issn = $this->getMultipleValues($record, '022a'); $snippet = $this->getMultipleValues($record, '520a'); $subjects = $this->getMultipleValues($record, '600abcdefghjklmnopqrstuvxyz' . ':610abcdefghklmnoprstuvxyz' . ':611acdefghjklnpqstuvxyz' . ':630adefghklmnoprstvxyz' . ':650abcdevxyz', ' : '); $notes = $this->getMultipleValues($record, '500a'); $field773g = $this->getSingleValue($record, '773g'); $matches = []; if (preg_match('/(\\d*)\\s*\\((\\d{4})\\)\\s*:\\s*(\\d*)/', $field773g, $matches)) { if (!isset($volume)) { $volume = $matches[1]; } if (!isset($issue)) { $issue = $matches[3]; } } elseif (preg_match('/(\\d{4})\\s*:\\s*(\\d*)/', $field773g, $matches)) { if (!isset($volume)) { $volume = $matches[1]; } if (!isset($issue)) { $issue = $matches[2]; } } if (preg_match('/,\\s*\\w\\.?\\s*([\\d,\\-]+)/', $field773g, $matches)) { $pages = explode('-', $matches[1]); if (!isset($startPage)) { $startPage = $pages[0]; } if (isset($pages[1]) && !isset($endPage)) { $endPage = $pages[1]; } } $hostTitle = explode('. ', $this->getSingleValue($record, '773t'), 2); $year = str_replace('^^^^', '', $year); return ['title' => $title, 'author' => $author ? $author : null, 'author2' => $addAuthors, 'source' => $sources[0], 'publisher' => $publishers, 'main_date_str' => $year ? $year : null, 'publishDate' => $year ? [$year] : null, 'container_title' => $hostTitle ? $hostTitle[0] : null, 'openUrl' => !empty($openurlParams) ? http_build_query($openurlParams) : null, 'url' => $urls, 'proxy' => $proxy, 'fullrecord' => $record->asXML(), 'id' => '', 'recordtype' => 'marc', 'format' => [$format], 'isbn' => $isbn, 'issn' => $issn, 'ispartof' => "{$hostTitle[0]}, {$field773g}", 'language' => $languages, 'topic' => $subjects, 'description' => $snippet, 'notes' => $notes, 'container_volume' => isset($volume) ? $volume : '', 'container_issue' => isset($issue) ? $issue : '', 'container_start_page' => isset($startPage) ? $startPage : '', 'container_end_page' => isset($endPage) ? $endPage : '']; }
/** * Perform normalization and analysis of MetaLib return value * (a single record) * * @param simplexml $record The xml record from MetaLib * * @return array The processed record array * @access protected */ protected function process($record) { global $configArray; $record->registerXPathNamespace('m', 'http://www.loc.gov/MARC21/slim'); // TODO: can we get anything reliable from MetaLib results for format? $format = ''; $title = $this->getSingleValue($record, '245ab', ' : '); if ($addTitle = $this->getSingleValue($record, '245h')) { $title .= " {$addTitle}"; } $author = $this->getSingleValue($record, '100a'); $addAuthors = $this->getSingleValue($record, '700a'); $sources = $this->getMultipleValues($record, 'SIDt'); $year = $this->getSingleValue($record, 'YR a'); $languages = $this->getMultipleValues($record, '041a'); $urls = array(); $res = $record->xpath("./m:datafield[@tag='856']"); foreach ($res as $value) { $value->registerXPathNamespace('m', 'http://www.loc.gov/MARC21/slim'); $url = $value->xpath("./m:subfield[@code='u']"); if ($url) { $desc = $value->xpath("./m:subfield[@code='y']"); if ($desc) { $urls[(string) $url[0]] = (string) $desc[0]; } else { $urls[(string) $url[0]] = (string) $url[0]; } } } $proxy = false; $ird = $this->getSingleValue($record, 'SIDd'); if ($ird) { $info = $this->getIRDInfo($ird); $proxy = $info['proxy'] == 'Y'; } $openurl = array(); if (isset($configArray['OpenURL']['url']) && $configArray['OpenURL']['url']) { $opu = $this->getSingleValue($record, 'OPUa'); if ($opu) { $opuxml = simplexml_load_string($opu); $opuxml->registerXPathNamespace('ctx', 'info:ofi/fmt:xml:xsd:ctx'); $opuxml->registerXPathNamespace('rft', ''); //info:ofi/fmt:xml:xsd'); foreach ($opuxml->xpath('//*') as $element) { if (in_array($element->getName(), array('journal', 'author'))) { continue; } $value = trim((string) $element); if ($value) { $openurl[$element->getName()] = $value; // OpenURL might have many nicely parsed elements we can use switch ($element->getName()) { case 'date': if (empty($year)) { $year = $value; } break; case 'volume': $volume = $value; break; case 'issue': $issue = $value; break; case 'spage': $startPage = $value; break; case 'epage': $endPage = $value; break; } } } if (!empty($openurl)) { $openurl['rfr_id'] = $configArray['OpenURL']['rfr_id']; } } } $isbn = $this->getMultipleValues($record, '020a'); $issn = $this->getMultipleValues($record, '022a'); $snippet = $this->getMultipleValues($record, '520a'); $subjects = $this->getMultipleValues($record, '600abcdefghjklmnopqrstuvxyz' . ':610abcdefghklmnoprstuvxyz' . ':611acdefghjklnpqstuvxyz' . ':630adefghklmnoprstvxyz' . ':650abcdevxyz', ' : '); $notes = $this->getMultipleValues($record, '500a'); $field773g = $this->getSingleValue($record, '773g'); $matches = array(); if (preg_match('/(\\d*)\\s*\\((\\d{4})\\)\\s*:\\s*(\\d*)/', $field773g, $matches)) { if (!isset($volume)) { $volume = $matches[1]; } if (!isset($issue)) { $issue = $matches[3]; } } elseif (preg_match('/(\\d{4})\\s*:\\s*(\\d*)/', $field773g, $matches)) { if (!isset($volume)) { $volume = $matches[1]; } if (!isset($issue)) { $issue = $matches[2]; } } if (preg_match('/,\\s*\\w\\.?\\s*([\\d,\\-]+)/', $field773g, $matches)) { $pages = explode('-', $matches[1]); if (!isset($startPage)) { $startPage = $pages[0]; } if (isset($pages[1]) && !isset($endPage)) { $endPage = $pages[1]; } } $hostTitle = $this->getSingleValue($record, '773t'); if ($hostTitle && $field773g) { $hostTitle .= " {$field773g}"; } $year = str_replace('^^^^', '', $year); return array('Title' => array($title), 'Author' => $author ? array($author) : null, 'AdditionalAuthors' => $addAuthors, 'Source' => $sources, 'PublicationDate' => $year ? array($year) : null, 'PublicationTitle' => $hostTitle ? array($hostTitle) : null, 'openUrl' => !empty($openurl) ? http_build_query($openurl) : null, 'url' => $urls, 'proxy' => $proxy, 'fullrecord' => $record->asXML(), 'id' => '', 'recordtype' => 'marc', 'format' => array($format), 'ISBN' => $isbn, 'ISSN' => $issn, 'Language' => $languages, 'SubjectTerms' => $subjects, 'Snippet' => $this->snippets ? $snippet : null, 'Notes' => $notes, 'Volume' => isset($volume) ? $volume : '', 'Issue' => isset($issue) ? $issue : '', 'StartPage' => isset($startPage) ? $startPage : '', 'EndPage' => isset($endPage) ? $endPage : ''); }