/** * Check of articles is based on code from Ian Hamilton. This version is more * limited in that it focuses on English, Spanish, French, Italian and German * articles. Certain possible articles have been removed if they are valid * English non-articles. This version also disregards 008_language/041 codes * and just uses the list of articles to provide warnings/suggestions. * * source for articles = <http://www.loc.gov/marc/bibliographic/bdapp-e.html> * * Should work with fields 130, 240, 245, 440, 630, 730, and 830. Reports error * if another field is passed in. * * @param File_MARC_Field $field Field to check * * @return void */ protected function checkArticle($field) { // add articles here as needed // Some omitted due to similarity with valid words (e.g. the German 'die'). static $article = array('a' => 'eng glg hun por', 'an' => 'eng', 'das' => 'ger', 'dem' => 'ger', 'der' => 'ger', 'ein' => 'ger', 'eine' => 'ger', 'einem' => 'ger', 'einen' => 'ger', 'einer' => 'ger', 'eines' => 'ger', 'el' => 'spa', 'en' => 'cat dan nor swe', 'gl' => 'ita', 'gli' => 'ita', 'il' => 'ita mlt', 'l' => 'cat fre ita mlt', 'la' => 'cat fre ita spa', 'las' => 'spa', 'le' => 'fre ita', 'les' => 'cat fre', 'lo' => 'ita spa', 'los' => 'spa', 'os' => 'por', 'the' => 'eng', 'um' => 'por', 'uma' => 'por', 'un' => 'cat spa fre ita', 'una' => 'cat spa ita', 'une' => 'fre', 'uno' => 'ita'); // add exceptions here as needed // may want to make keys lowercase static $exceptions = array('A & E', 'A & ', 'A-', 'A+', 'A is ', 'A isn\'t ', 'A l\'', 'A la ', 'A posteriori', 'A priori', 'A to ', 'El Nino', 'El Salvador', 'L is ', 'L-', 'La Salle', 'Las Vegas', 'Lo mein', 'Los Alamos', 'Los Angeles'); // get tagno to determine which indicator to check and for reporting $tagNo = $field->getTag(); // retrieve tagno from subfield 6 if 880 field if ($tagNo == '880' && ($sub6 = $field->getSubfield('6'))) { $tagNo = substr($sub6->getData(), 0, 3); } // $ind holds nonfiling character indicator value $ind = ''; // $first_or_second holds which indicator is for nonfiling char value $first_or_second = ''; if (in_array($tagNo, array(130, 630, 730))) { $ind = $field->getIndicator(1); $first_or_second = '1st'; } else { if (in_array($tagNo, array(240, 245, 440, 830))) { $ind = $field->getIndicator(2); $first_or_second = '2nd'; } else { $this->warn('Internal error: ' . $tagNo . " is not a valid field for article checking\n"); return; } } if (!is_numeric($ind)) { $this->warn($tagNo . ": Non-filing indicator is non-numeric"); return; } // get subfield 'a' of the title field $titleField = $field->getSubfield('a'); $title = $titleField ? $titleField->getData() : ''; $char1_notalphanum = 0; // check for apostrophe, quote, bracket, or parenthesis, before first word // remove if found and add to non-word counter while (preg_match('/^["\'\\[\\(*]/', $title)) { $char1_notalphanum++; $title = preg_replace('/^["\'\\[\\(*]/', '', $title); } // split title into first word + rest on space, parens, bracket, apostrophe, // quote, or hyphen preg_match('/^([^ \\(\\)\\[\\]\'"\\-]+)([ \\(\\)\\[\\]\'"\\-])?(.*)/i', $title, $hits); $firstword = isset($hits[1]) ? $hits[1] : ''; $separator = isset($hits[2]) ? $hits[2] : ''; $etc = isset($hits[3]) ? $hits[3] : ''; // get length of first word plus the number of chars removed above plus one // for the separator $nonfilingchars = strlen($firstword) + $char1_notalphanum + 1; // check to see if first word is an exception $isan_exception = false; foreach ($exceptions as $current) { if (substr($title, 0, strlen($current)) == $current) { $isan_exception = true; break; } } // lowercase chars of $firstword for comparison with article list $firstword = strtolower($firstword); // see if first word is in the list of articles and not an exception $isan_article = !$isan_exception && isset($article[$firstword]); // if article then $nonfilingchars should match $ind if ($isan_article) { // account for quotes, apostrophes, parens, or brackets before 2nd word if (strlen($separator) && preg_match('/^[ \\(\\)\\[\\]\'"\\-]+/', $etc)) { while (preg_match('/^[ "\'\\[\\]\\(\\)*]/', $etc)) { $nonfilingchars++; $etc = preg_replace('/^[ "\'\\[\\]\\(\\)*]/', '', $etc); } } if ($nonfilingchars != $ind) { $this->warn($tagNo . ": First word, {$firstword}, may be an article, check " . "{$first_or_second} indicator ({$ind})."); } } else { // not an article so warn if $ind is not 0 if ($ind != '0') { $this->warn($tagNo . ": First word, {$firstword}, does not appear to be an " . "article, check {$first_or_second} indicator ({$ind})."); } } }
/** * Destroys the control field */ function __destruct() { $this->data = null; parent::__destruct(); }
/** * Destroys the data field */ function __destruct() { $this->subfields = null; $this->ind1 = null; $this->ind2 = null; parent::__destruct(); }