Exemplo n.º 1
0
 protected function getHoldings($arrIDs)
 {
     $items = new Xerxes_Record_Items();
     $cache_id = "";
     // id used in cache
     $bib_id = "";
     // bibliographic id number
     $oclc = "";
     // oclc number
     // figure out what is what
     foreach ($arrIDs as $id) {
         if (stristr($id, "ISBN:")) {
             continue;
         }
         if (stristr($id, "OCLC:")) {
             $oclc = $id;
         } else {
             $bib_id = $id;
         }
     }
     // no bib id supplied, so use oclc number as id
     if ($bib_id == "") {
         if ($oclc == "") {
             throw new Exception("no bibliographic id or oclc number suppled in availability lookup");
         }
         $cache_id = str_replace("OCLC:", "", $oclc);
     } else {
         $cache_id = $bib_id;
     }
     // get url to availability server
     $strSource = $this->getSource();
     $url = $this->getHoldingsURL($strSource);
     // no holdings source defined or somehow id's are blank
     if ($url == null || count($arrIDs) == 0) {
         return $items;
         // empty items
     }
     // get the data
     $url .= "?action=status&id=" . urlencode(implode(" ", $arrIDs));
     $data = Xerxes_Framework_Parser::request($url);
     // echo $url; exit;
     // no data, what's up with that?
     if ($data == "") {
         throw new Exception("could not connect to availability server");
     }
     // echo $data; exit;
     // response is (currently) an array of json objects
     $arrResults = json_decode($data);
     // parse the response
     if (is_array($arrResults)) {
         if (count($arrResults) > 0) {
             // now just slot them into our item object
             foreach ($arrResults as $holding) {
                 $is_holdings = property_exists($holding, "holding");
                 if ($is_holdings == true) {
                     $item = new Xerxes_Record_Holding();
                 } else {
                     $item = new Xerxes_Record_Item();
                 }
                 foreach ($holding as $property => $value) {
                     $item->setProperty($property, $value);
                 }
                 $items->addItem($item);
             }
         }
     }
     // cache it for the future
     // expiry set for two hours
     $expiry = $this->config->getConfig("HOLDINGS_CACHE_EXPIRY", false, 2 * 60 * 60);
     $expiry += time();
     $cache = new Xerxes_Data_Cache();
     $cache->source = $this->getSource();
     $cache->id = $cache_id;
     $cache->expiry = $expiry;
     $cache->data = serialize($items);
     $this->data_map->setCache($cache);
     return $items;
 }
Exemplo n.º 2
0
 /**
  * Maps the marc data to the object's properties
  */
 protected function map()
 {
     // item data in the XML?
     foreach ($this->document->getElementsByTagName("item") as $item_record) {
         $item = new Xerxes_Record_Item();
         $item->loadXML($item_record);
         $this->addItem($item);
     }
     ## openurl
     // the source can contain an openurl context object buried in it as well as marc-xml
     // test to see what profile the context object is using; set namespace accordingly
     if ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd:book", "book")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:book");
     } elseif ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd:dissertation", "dissertation")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:dissertation");
     } elseif ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd", "journal")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd");
     } else {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:journal");
     }
     // context object:
     // these just in case
     $objATitle = $this->xpath->query("//rft:atitle")->item(0);
     $objBTitle = $this->xpath->query("//rft:btitle")->item(0);
     $objAuthors = $this->xpath->query("//rft:author[rft:aulast != '' or rft:aucorp != '']");
     $objGenre = $this->xpath->query("//rft:genre")->item(0);
     $objDate = $this->xpath->query("//rft:date")->item(0);
     // journal title, volume, issue, pages from context object
     $objTitle = $this->xpath->query("//rft:title")->item(0);
     $objSTitle = $this->xpath->query("//rft:stitle")->item(0);
     $objJTitle = $this->xpath->query("//rft:jtitle")->item(0);
     $objVolume = $this->xpath->query("//rft:volume")->item(0);
     $objIssue = $this->xpath->query("//rft:issue")->item(0);
     $objStartPage = $this->xpath->query("//rft:spage")->item(0);
     $objEndPage = $this->xpath->query("//rft:epage")->item(0);
     $objISSN = $this->xpath->query("//rft:issn")->item(0);
     $objISBN = $this->xpath->query("//rft:isbn")->item(0);
     if ($objSTitle != null) {
         $this->short_title = $objSTitle->nodeValue;
     }
     if ($objVolume != null) {
         $this->volume = $objVolume->nodeValue;
     }
     if ($objIssue != null) {
         $this->issue = $objIssue->nodeValue;
     }
     if ($objStartPage != null) {
         $this->start_page = $objStartPage->nodeValue;
     }
     if ($objEndPage != null) {
         $this->end_page = $objEndPage->nodeValue;
     }
     if ($objISBN != null) {
         array_push($this->isbns, $objISBN->nodeValue);
     }
     if ($objISSN != null) {
         array_push($this->issns, $objISSN->nodeValue);
     }
     if ($objGenre != null) {
         array_push($this->format_array, $objGenre->nodeValue);
     }
     // control and standard numbers
     $this->control_number = $this->controlfield("001")->__toString();
     $this->record_id = $this->control_number;
     $arrIssn = $this->fieldArray("022", "a");
     $arrIsbn = $this->fieldArray("020", "az");
     $this->govdoc_number = $this->datafield("086")->subfield("a")->__toString();
     $this->gpo_number = $this->datafield("074")->subfield("a")->__toString();
     // doi
     // this is kind of iffy since the 024 is not _really_ a DOI field; but this
     // is the most likely marc field; however need to see if the number follows the very loose
     // pattern of the DOI of 'prefix/suffix', where prefix and suffix can be nearly anything
     $field_024 = $this->fieldArray("024", "a");
     foreach ($field_024 as $doi) {
         // strip any doi: prefix
         $doi = str_ireplace("doi:", "", $doi);
         $doi = str_ireplace("doi", "", $doi);
         // got it!
         if (preg_match('/.*\\/.*/', $doi)) {
             $this->doi = $doi;
             break;
         }
     }
     $strJournalIssn = $this->datafield("773")->subfield("x")->__toString();
     if ($strJournalIssn != null) {
         array_push($arrIssn, $strJournalIssn);
     }
     // call number
     $strCallNumber = $this->datafield("050")->__toString();
     $strCallNumberLocal = $this->datafield("090")->__toString();
     if ($strCallNumber != null) {
         $this->call_number = $strCallNumber;
     } elseif ($strCallNumberLocal != null) {
         $this->call_number = $strCallNumberLocal;
     }
     // format
     $this->technology = $this->datafield("538")->subfield("a")->__toString();
     $arrFormat = $this->fieldArray("513", "a");
     foreach ($arrFormat as $format) {
         array_push($this->format_array, $format);
     }
     $strTitleFormat = $this->datafield("245")->subfield("hk")->__toString();
     if ($strTitleFormat != null) {
         array_push($this->format_array, $strTitleFormat);
     }
     // thesis degree, institution, date awarded
     $strThesis = $this->datafield("502")->subfield("a")->__toString();
     ### title
     $this->title = $this->datafield("245")->subfield("anp")->__toString();
     $this->sub_title = $this->datafield("245")->subfield("b")->__toString();
     $this->series_title = $this->datafield("440")->subfield("a")->__toString();
     $this->uniform_title = $this->datafield("130|240")->__toString();
     // sometimes the title appears in a 242 or even a 246 if it is translated from another
     // language, although the latter is probably bad practice.  We will only take these
     // if the title in the 245 is blank, and take a 242 over the 246
     $strTransTitle = $this->datafield("242")->subfield("a")->__toString();
     $strTransSubTitle = $this->datafield("242")->subfield("b")->__toString();
     $strVaryingTitle = $this->datafield("246")->subfield("a")->__toString();
     $strVaryingSubTitle = $this->datafield("246")->subfield("b")->__toString();
     if ($this->title == "" && $strTransTitle != "") {
         $this->title = $strTransTitle;
         $this->trans_title = true;
     } elseif ($this->title == "" && $strVaryingTitle != "") {
         $this->title = $strVaryingTitle;
         $this->trans_title = true;
     }
     if ($this->sub_title == "" && $strTransSubTitle != "") {
         $this->sub_title = $strTransTitle;
         $this->trans_title = true;
     } elseif ($this->sub_title == "" && $strVaryingSubTitle != "") {
         $this->sub_title = $strVaryingSubTitle;
         $this->trans_title = true;
     }
     // alternate titles
     foreach ($this->datafield("246") as $varying_title) {
         array_push($this->alternate_titles, $varying_title->__toString());
     }
     // last chance, check the context object
     if ($this->title == "" && $objATitle != null) {
         $this->title = $objATitle->nodeValue;
     } elseif ($this->title == "" && $objBTitle != null) {
         $this->title = $objBTitle->nodeValue;
     }
     // additional titles for display
     foreach ($this->datafield('730|740') as $additional_titles) {
         $subfields = $additional_titles->subfield()->__toString();
         array_push($this->additional_titles, $subfields);
     }
     ### exception: 245|c is remainder of title, not statement of responsibility
     $statement_of_responsiblity = (string) $this->datafield("245")->subfield("c");
     $title_parts = preg_split('/\\W/', $statement_of_responsiblity);
     $found = false;
     foreach ($this->datafield("100|111|700|710|711") as $author) {
         $author_parts = preg_split('/\\W/', (string) $author);
         foreach ($author_parts as $author_part) {
             if (in_array($author_part, $title_parts)) {
                 $found = true;
             }
         }
     }
     // if the 245|c doesn't include *any* terms from any of the author fields, then this is likely
     // the continuation of the title, rather than the statement of responsibility, and
     // so we need to include it in the title proper
     if ($found == false && (string) $this->datafield("245") != "") {
         $this->title = (string) $this->datafield("245")->subfield("acnp");
         // added 'c'
     }
     // edition, extent, description
     $this->edition = $this->datafield("250")->subfield("a")->__toString();
     $this->extent = $this->datafield("300")->subfield("a")->__toString();
     $this->description = $this->datafield("300")->__toString();
     $this->price = $this->datafield("365")->__toString();
     // publisher
     $this->place = $this->datafield("260")->subfield("a")->__toString();
     $this->publisher = $this->datafield("260")->subfield("b")->__toString();
     // date
     $strDate = $this->datafield("260")->subfield("c")->__toString();
     // notes
     $arrToc = $this->fieldArray("505", "agrt");
     foreach ($arrToc as $toc) {
         $this->toc .= $toc;
     }
     $arrAbstract = $this->fieldArray("520", "a");
     $strLanguageNote = $this->datafield("546")->subfield("a")->__toString();
     // other notes
     $objNotes = $this->xpath("//marc:datafield[@tag >= 500 and @tag < 600 and @tag != 505 and @tag != 520 and @tag != 546]");
     foreach ($objNotes as $objNote) {
         array_push($this->notes, $objNote->nodeValue);
     }
     // subjects
     // we'll exclude the numeric subfields since they contain information about the
     // source of the subject terms, which are probably not needed for display?
     foreach ($this->datafield("6XX") as $subject) {
         $subfields = $subject->subfield("abcdefghijklmnopqrstuvwxyz");
         $subfields_array = array();
         foreach ($subfields as $subfield) {
             array_push($subfields_array, $subfield->__toString());
         }
         $subject_object = new Xerxes_Record_Subject();
         $subject_object->display = implode(" -- ", $subfields_array);
         $subject_object->value = $subfields->__toString();
         array_push($this->subjects, $subject_object);
     }
     // series information
     foreach ($this->datafield('4XX|800|810|811|830') as $subject) {
         array_push($this->series, $subject->__toString());
     }
     // journal
     // specify the order of the subfields in 773 for journal as $a $t $g and then everything else
     //  in case they are out of order
     $this->journal = $this->datafield("773")->subfield("atgbcdefhijklmnopqrsuvwxyz1234567890", true)->__toString();
     $strJournal = $this->datafield("773")->subfield("agpqt")->__toString();
     $this->journal_title = $this->datafield("773")->subfield("t")->__toString();
     $this->short_title = $this->datafield("773")->subfield("p")->__toString();
     $strExtentHost = $this->datafield("773")->subfield("h")->__toString();
     // continues and continued by
     $this->journal_title_continues = (string) $this->datafield("780")->subfield('at');
     $this->journal_title_continued_by = (string) $this->datafield("785")->subfield('at');
     // alternate character-scripts
     // the 880 represents an alternative character-script, like Hebrew or CJK;
     // for simplicity's sake, we just dump them all here in an array, with the
     // intent of displaying them in paragraphs together in the interface or something?
     // we get every field except for the $6 which is a linking field
     $this->alt_scripts = $this->fieldArray("880", "abcdefghijklmnopqrstuvwxyz12345789");
     // now use the $6 to figure out which character-script this is
     // assume just one for now
     $strAltScript = $this->datafield("880")->subfield("6")->__toString();
     if ($strAltScript != null) {
         $arrMatchCodes = array();
         $arrScriptCodes = array("(3" => "Arabic", "(B" => "Latin", '$1' => "CJK", "(N" => "Cyrillic", "(S" => "Greek", "(2" => "Hebrew");
         if (preg_match('/[0-9]{3}-[0-9]{2}\\/([^\\/]*)/', $strAltScript, $arrMatchCodes)) {
             if (array_key_exists($arrMatchCodes[1], $arrScriptCodes)) {
                 $this->alt_script_name = $arrScriptCodes[$arrMatchCodes[1]];
             }
         }
     }
     ### volume, issue, pagination
     // a best guess extraction of volume, issue, pages from 773
     $arrRegExJournal = $this->parseJournalData($strJournal);
     // some sources include ^ as a filler character in issn/isbn, these people should be shot!
     foreach ($arrIssn as $strIssn) {
         if (strpos($strIssn, "^") === false) {
             array_push($this->issns, $strIssn);
         }
     }
     foreach ($arrIsbn as $strIsbn) {
         if (strpos($strIsbn, "^") === false) {
             array_push($this->isbns, $strIsbn);
         }
     }
     ### language
     $langConverter = Xerxes_Framework_Languages::getInstance();
     // take an explicit language note over 008 if available
     if ($strLanguageNote != null) {
         $strLanguageNote = $this->stripEndPunctuation($strLanguageNote, ".");
         if (strlen($strLanguageNote) == 2) {
             $this->language = $langConverter->getNameFromCode('iso_639_1_code', $strLanguageNote);
         } elseif (strlen($strLanguageNote) == 3) {
             $this->language = $langConverter->getNameFromCode('iso_639_2B_code', $strLanguageNote);
         } elseif (!stristr($strLanguageNote, "Undetermined")) {
             $this->language = str_ireplace("In ", "", $strLanguageNote);
             $language = $langConverter->getNameFromCode('name', ucfirst($this->language));
             if ($language != null) {
                 $this->language = $language;
             }
         }
     } else {
         // get the language code from the 008
         $objLang = $this->controlfield("008")->__toString();
         if ($objLang instanceof Xerxes_Marc_ControlField) {
             $strLangCode = $objLang->position("35-37");
             if ($strLangCode != "") {
                 $this->language = $langConverter->getNameFromCode('iso_639_2B_code', $strLanguageNote);
             }
         }
     }
     ### format
     $this->format = $this->parseFormat($this->format_array);
     ### full-text
     // examine the 856s present in the record to see if they are in
     // fact to full-text, and not to a table of contents or something
     // stupid like that
     foreach ($this->datafield("856") as $link) {
         $resource_type = $link->ind2;
         $part = $link->subfield("3")->__toString();
         $strUrl = $link->subfield("u")->__toString();
         $strHostName = $link->subfield("a")->__toString();
         $strDisplay = $link->subfield("z")->__toString();
         $strLinkFormatType = $link->subfield("q")->__toString();
         $strLinkText = $link->subfield("y")->__toString();
         if ($strDisplay == "") {
             if ($strLinkText != "") {
                 $strDisplay = $strLinkText;
             } elseif ($strHostName != "") {
                 $strDisplay = $strHostName;
             }
         }
         if ($part != "") {
             $strDisplay = $part . " " . $strDisplay;
         }
         // no link supplied
         if ($link->subfield("u")->__toString() == "") {
             continue;
         }
         // link includes loc url (bad catalogers!)
         if (stristr($strUrl, "catdir") || $resource_type == 2) {
             array_push($this->links, array(null, $link->subfield("u")->__toString(), "none"));
         } else {
             $strLinkFormat = "online";
             if (stristr($strDisplay, "PDF") || stristr($strUrl, "PDF") || stristr($strLinkFormatType, "PDF") || stristr($strLinkText, "PDF")) {
                 $strLinkFormat = "pdf";
             } elseif (stristr($strDisplay, "HTML") || stristr($strLinkFormatType, "HTML") || stristr($strLinkText, "HTML")) {
                 $strLinkFormat = "html";
             }
             array_push($this->links, array($strDisplay, $strUrl, $strLinkFormat));
         }
     }
     ### oclc number
     // oclc number can be either in the 001 or in the 035$a
     // make sure 003 says 001 is oclc number or 001 includes an oclc prefix,
     $str001 = $this->controlfield("001")->__toString();
     $str003 = $this->controlfield("003")->__toString();
     $str035 = $this->datafield("035")->subfield("a")->__toString();
     if ($str001 != "" && ($str003 == "" && preg_match('/^\\(?([Oo][Cc])/', $str001) || $str003 == "OCoLC")) {
         $this->oclc_number = $str001;
     } elseif (strpos($str035, "OCoLC") !== false) {
         $this->oclc_number = $str035;
     }
     // get just the number
     $arrOclc = array();
     if (preg_match("/[0-9]{1,}/", $this->oclc_number, $arrOclc) != 0) {
         $strJustOclcNumber = $arrOclc[0];
         // strip out leading 0s
         $strJustOclcNumber = preg_replace("/^0{1,8}/", "", $strJustOclcNumber);
         $this->oclc_number = $strJustOclcNumber;
     }
     ### summary
     // abstract
     foreach ($arrAbstract as $strAbstract) {
         $this->abstract .= " " . $strAbstract;
     }
     $this->abstract = trim(strip_tags($this->abstract));
     // summary
     if ($this->abstract != "") {
         $this->summary = $this->abstract;
         $this->summary_type = "abstract";
     } elseif ($this->toc != "") {
         $this->summary = $this->toc;
         $this->summary_type = "toc";
     } elseif (count($this->subjects) > 0) {
         $this->summary_type = "subjects";
         for ($x = 0; $x < count($this->subjects); $x++) {
             $subject_object = $this->subjects[$x];
             $this->summary .= $subject_object->value;
             if ($x < count($this->subjects) - 1) {
                 $this->summary .= "; ";
             }
         }
     }
     ### journal title
     // we'll take the journal title form the 773$t as the best option,
     if ($this->journal_title == "") {
         // otherwise see if context object has one
         if ($objJTitle != null) {
             $this->journal_title = $objJTitle->nodeValue;
         } elseif ($objTitle != null) {
             $this->journal_title = $objTitle->nodeValue;
         } elseif ($this->short_title != "" && ($this->format == "Article" || $this->format == "Journal" || $this->format == "Newspaper")) {
             $this->journal_title = $this->short_title;
         }
     }
     ### volume
     if ($this->volume == "") {
         if (array_key_exists("volume", $arrRegExJournal)) {
             $this->volume = $arrRegExJournal["volume"];
         }
     }
     ### issue
     if ($this->issue == "") {
         if (array_key_exists("issue", $arrRegExJournal)) {
             $this->issue = $arrRegExJournal["issue"];
         }
     }
     ### pages
     // start page
     if ($this->start_page == "") {
         if (array_key_exists("spage", $arrRegExJournal)) {
             $this->start_page = $arrRegExJournal["spage"];
         }
     }
     // end page
     if ($this->end_page == "") {
         if (array_key_exists("epage", $arrRegExJournal)) {
             // found an end page from our generic regular expression parser
             $this->end_page = $arrRegExJournal["epage"];
         } elseif ($strExtentHost != "" && $this->start_page != "") {
             // there is an extent note, indicating the number of pages,
             // calculate end page based on that
             $arrExtent = array();
             if (preg_match('/([0-9]{1})\\/([0-9]{1})/', $strExtentHost, $arrExtent) != 0) {
                 // if extent expressed as a fraction of a page, just take
                 // the start page as the end page
                 $this->end_page = $this->start_page;
             } elseif (preg_match("/[0-9]{1,}/", $strExtentHost, $arrExtent) != 0) {
                 // otherwise take whole number
                 $iStart = (int) $this->start_page;
                 $iEnd = (int) $arrExtent[0];
                 $this->end_page = $iStart + ($iEnd - 1);
             }
         }
     }
     // page normalization
     if ($this->end_page != "" && $this->start_page != "") {
         // pages were input as 197-8 or 197-82, or similar, so convert
         // the last number to the actual page number
         if (strlen($this->end_page) < strlen($this->start_page)) {
             $strMissing = substr($this->start_page, 0, strlen($this->start_page) - strlen($this->end_page));
             $this->end_page = $strMissing . $this->end_page;
         }
     }
     ### isbn
     // get just the isbn minus format notes
     for ($x = 0; $x < count($this->isbns); $x++) {
         $arrIsbnExtract = array();
         $this->isbns[$x] = str_replace("-", "", $this->isbns[$x]);
         if (preg_match("/[0-9]{12,13}X{0,1}/", $this->isbns[$x], $arrIsbnExtract) != 0) {
             $this->isbns[$x] = $arrIsbnExtract[0];
         } elseif (preg_match("/[0-9]{9,10}X{0,1}/", $this->isbns[$x], $arrIsbnExtract) != 0) {
             $this->isbns[$x] = $arrIsbnExtract[0];
         }
     }
     ### thesis
     // most 502 fields follow the following pattern, which we will use to
     // match and extract individual elements:
     // Thesis (M.F.A.)--University of California, San Diego, 2005
     // Thesis (Ph. D.)--Queen's University, Kingston, Ont., 1977.
     if ($strThesis != "") {
         // extract degree conferred
         $arrDegree = array();
         if (preg_match('/\\(([^\\(]*)\\)/', $strThesis, $arrDegree) != 0) {
             $this->degree = $arrDegree[1];
         }
         // extract institution
         $iInstPos = strpos($strThesis, "--");
         if ($iInstPos !== false) {
             $strInstitution = "";
             // get everything after the --
             $strInstitution = substr($strThesis, $iInstPos + 2, strlen($strThesis) - 1);
             // find last comma in remaining text
             $iEndPosition = strrpos($strInstitution, ",");
             if ($iEndPosition !== false) {
                 $strInstitution = substr($strInstitution, 0, $iEndPosition);
             }
             $this->institution = $strInstitution;
         }
         // extract year conferred
         $this->year = $this->extractYear($strThesis);
     }
     ### title
     $this->non_sort = strip_tags($this->non_sort);
     $this->title = strip_tags($this->title);
     $this->sub_title = strip_tags($this->sub_title);
     // make sure subtitle is properly parsed out
     $iColon = strpos($this->title, ":");
     if ($this->sub_title == "" && $iColon !== false) {
         $this->sub_title = trim(substr($this->title, $iColon + 1));
         $this->title = trim(substr($this->title, 0, $iColon));
     }
     // make sure nonSort portion of the title is extracted
     // punctuation; we'll also *add* the definite/indefinite article below should
     // the quote be followed by one of those -- this is all in english, yo!
     if (strlen($this->title) > 0) {
         if (substr($this->title, 0, 1) == "\"" || substr($this->title, 0, 1) == "'") {
             $this->non_sort = substr($this->title, 0, 1);
             $this->title = substr($this->title, 1);
         }
     }
     // common definite and indefinite articles
     if (strlen($this->title) > 4) {
         if (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 4)) == "the ") {
             $this->non_sort .= substr($this->title, 0, 4);
             $this->title = substr($this->title, 4);
         } elseif (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 2)) == "a ") {
             $this->non_sort .= substr($this->title, 0, 2);
             $this->title = substr($this->title, 2);
         } elseif (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 3)) == "an ") {
             $this->non_sort .= substr($this->title, 0, 3);
             $this->title = substr($this->title, 3);
         }
     }
     ### year
     if ($strDate != "") {
         $this->year = $this->extractYear($strDate);
     } elseif ($this->extractYear($this->publisher)) {
         // off chance that the date is hanging out in the publisher field;
         // might as well strip it out here as well
         $this->year = $this->extractYear($this->publisher);
         $this->publisher = str_replace($this->year, "", $this->publisher);
     } elseif ($this->extractYear($this->journal)) {
         // perhaps somewhere in the 773$g
         $this->year = $this->extractYear($this->journal);
     }
     // last chance grab from context object
     if ($this->year == "" && $objDate != null) {
         $this->year = $this->extractYear($objDate->nodeValue);
     }
     #### authors
     // authors
     $this->author_from_title = (string) $this->datafield("245")->subfield("c")->__toString();
     $objConfName = $this->datafield("111");
     // "anc"
     $objAddAuthor = $this->datafield("700");
     // "a"
     $objAddCorp = $this->datafield("710");
     //, "ab"
     $objAddConf = $this->datafield("711");
     // "acn"
     // conference and corporate names from title ?
     $objConferenceTitle = $this->datafield("811");
     // all
     if ($objAddConf->length() == 0 && $objConferenceTitle->length() > 0) {
         $objAddConf = $objConferenceTitle;
     }
     $objCorporateTitle = $this->datafield("810");
     // all
     if ($objAddCorp->length() == 0 && $objCorporateTitle->length() > 0) {
         $objAddCorp = $objCorporateTitle;
     }
     if ($objConfName->length() > 0 || $objAddConf->length() > 0) {
         array_push($this->format_array, "conference paper");
     }
     // personal primary author
     if ($this->datafield("100")->length() > 0) {
         $objXerxesAuthor = $this->splitAuthor($this->datafield("100"), "a", "personal");
         array_push($this->authors, $objXerxesAuthor);
     } elseif ($objAddAuthor->length() > 0) {
         // editor
         $objXerxesAuthor = $this->splitAuthor($objAddAuthor->item(0), "a", "personal", true);
         array_push($this->authors, $objXerxesAuthor);
         $this->editor = true;
     }
     // additional personal authors
     if ($objAddAuthor->length() > 0) {
         // if there is an editor it has already been included in the array
         // so we need to skip the first author in the list
         if ($this->editor == true) {
             $objAddAuthor->next();
         }
         foreach ($objAddAuthor as $obj700) {
             $objXerxesAuthor = $this->splitAuthor($obj700, "a", "personal", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // corporate author
     if ($this->datafield("110")->subfield("ab")->__toString() != "") {
         $objXerxesAuthor = $this->splitAuthor($this->datafield("110"), "ab", "corporate");
         array_push($this->authors, $objXerxesAuthor);
     }
     // additional corporate authors
     if ($objAddCorp->length() > 0) {
         foreach ($objAddCorp as $objCorp) {
             $objXerxesAuthor = $this->splitAuthor($objCorp, "ab", "corporate", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // conference name
     if ($objConfName->length() > 0) {
         $objXerxesAuthor = $this->splitAuthor($objConfName, "anc", "conference");
         array_push($this->authors, $objXerxesAuthor);
     }
     // additional conference names
     if ($objAddConf->length() > 0) {
         foreach ($objAddConf as $objConf) {
             $objXerxesAuthor = $this->splitAuthor($objConf, "acn", "conference", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // last-chance from context-object
     if (count($this->authors) == 0 && $objAuthors != null) {
         foreach ($objAuthors as $objAuthor) {
             $objXerxesAuthor = new Xerxes_Record_Author();
             foreach ($objAuthor->childNodes as $objAuthAttr) {
                 switch ($objAuthAttr->localName) {
                     case "aulast":
                         $objXerxesAuthor->last_name = $objAuthAttr->nodeValue;
                         $objXerxesAuthor->type = "personal";
                         break;
                     case "aufirst":
                         $objXerxesAuthor->first_name = $objAuthAttr->nodeValue;
                         break;
                     case "auinit":
                         $objXerxesAuthor->init = $objAuthAttr->nodeValue;
                         break;
                     case "aucorp":
                         $objXerxesAuthor->name = $objAuthAttr->nodeValue;
                         $objXerxesAuthor->type = "corporate";
                         break;
                 }
             }
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // construct a readable journal field if none supplied
     if ($this->journal == "") {
         if ($this->journal_title != "") {
             $this->journal = $this->toTitleCase($this->journal_title);
             if ($this->volume != "") {
                 $this->journal .= " vol. " . $this->volume;
             }
             if ($this->issue != "") {
                 $this->journal .= " iss. " . $this->issue;
             }
             if ($this->year != "") {
                 $this->journal .= " (" . $this->year . ")";
             }
         }
     }
     ## de-duping
     // make sure no dupes in author array
     $author_original = $this->authors;
     $author_other = $this->authors;
     for ($x = 0; $x < count($author_original); $x++) {
         $objXerxesAuthor = $author_original[$x];
         if ($objXerxesAuthor instanceof Xerxes_Record_Author) {
             $this_author = $objXerxesAuthor->allFields();
             for ($a = 0; $a < count($author_other); $a++) {
                 if ($a != $x) {
                     $objThatAuthor = $author_other[$a];
                     if ($objThatAuthor instanceof Xerxes_Record_Author) {
                         $that_author = $objThatAuthor->allFields();
                         if ($this_author == $that_author) {
                             // remove the dupe
                             $author_original[$a] = null;
                         }
                     }
                 }
             }
         }
     }
     $this->authors = array();
     // reset author array
     foreach ($author_original as $author) {
         if ($author instanceof Xerxes_Record_Author) {
             array_push($this->authors, $author);
         }
     }
     // make sure no dupes and no blanks in standard numbers
     $arrISSN = $this->issns;
     $arrISBN = $this->isbns;
     $this->issns = array();
     $this->isbns = array();
     foreach ($arrISSN as $strISSN) {
         $strISSN = trim($strISSN);
         if ($strISSN != "") {
             $strISSN = str_replace("-", "", $strISSN);
             //extract the issn number leaving behind extra chars and comments
             $match = array();
             if (preg_match("/[0-9]{8,8}/", $strISSN, $match)) {
                 $strISSN = $match[0];
             }
             array_push($this->issns, $strISSN);
         }
     }
     foreach ($arrISBN as $strISBN) {
         $strISBN = trim($strISBN);
         if ($strISBN != "") {
             $strISBN = str_replace("-", "", $strISBN);
             array_push($this->isbns, $strISBN);
         }
     }
     $this->issns = array_unique($this->issns);
     $this->isbns = array_unique($this->isbns);
     ### punctuation clean-up
     $this->book_title = $this->stripEndPunctuation($this->book_title, "./;,:");
     $this->title = $this->stripEndPunctuation($this->title, "./;,:");
     $this->sub_title = $this->stripEndPunctuation($this->sub_title, "./;,:");
     $this->short_title = $this->stripEndPunctuation($this->short_title, "./;,:");
     $this->journal_title = $this->stripEndPunctuation($this->journal_title, "./;,:");
     $this->series_title = $this->stripEndPunctuation($this->series_title, "./;,:");
     $this->technology = $this->stripEndPunctuation($this->technology, "./;,:");
     $this->place = $this->stripEndPunctuation($this->place, "./;,:");
     $this->publisher = $this->stripEndPunctuation($this->publisher, "./;,:");
     $this->edition = $this->stripEndPunctuation($this->edition, "./;,:");
     for ($x = 0; $x < count($this->authors); $x++) {
         foreach ($this->authors[$x] as $key => $value) {
             $objXerxesAuthor = $this->authors[$x];
             foreach ($objXerxesAuthor as $key => $value) {
                 $objXerxesAuthor->{$key} = $this->stripEndPunctuation($value, "./;,:");
             }
             $this->authors[$x] = $objXerxesAuthor;
         }
     }
     for ($s = 0; $s < count($this->subjects); $s++) {
         $subject_object = $this->subjects[$s];
         $subject_object->value = $this->stripEndPunctuation($subject_object->value, "./;,:");
         $this->subjects[$s] = $subject_object;
     }
 }