/** * Fetch a file to be harvested * * @param string $filename File to retrieve * * @return string xml * @throws Exception */ protected function retrieveFile($filename) { $data = parent::retrieveFile($filename); // Remove the namespace declaration. Helps process the file, and it's invalid // anyway. $data = str_replace('<collection xmlns="http://www.loc.gov/MARC21/slim">', '<collection>', $data); // Fix the data $dataRows = explode("\n", $data); foreach ($dataRows as &$row) { // Looks like the category data comes in without proper encoding // of characters that must be encoded in XML. if (strncmp($row, '<main>', 6) == 0 || strncmp($row, '<sub>', 5) == 0) { $row = str_replace('&', '&', $row); } elseif (strncmp($row, '<line>', 6) == 0) { // Remove all the <line>... stuff $row = ''; } } return implode("\n", $dataRows); }
/** * Fetch a file to be harvested * * @param string $filename File to retrieve * * @return string xml * @throws Exception */ protected function retrieveFile($filename) { $data = parent::retrieveFile($filename); $data = str_replace('<collection xmlns="http://www.loc.gov/MARC21/slim">', '<collection>', $data); return $data; }