Esempio n. 1
0
 /**
  * Constructs the test suite handler.
  */
 public function __construct()
 {
     $this->setName('testsSuite');
     $this->addTestSuite('Xerxes_MetalibRecordTest');
     $this->addTestSuite('Xerxes_Record_DocumentTest');
     // load language file
     $objLanguage = Xerxes_Framework_Languages::getInstance();
     $objLanguage->init();
 }
Esempio n. 2
0
 public static function execute()
 {
     // calculate current file, this directory
     $this_directory = dirname(__FILE__);
     // calculate root directory of the app ../../ from here
     $path_to_parent = $this_directory;
     $path_to_parent = str_replace("\\", "/", $path_to_parent);
     $arrPath = explode("/", $path_to_parent);
     array_pop($arrPath);
     array_pop($arrPath);
     $path_to_parent = implode("/", $arrPath);
     // here so other framework files can reference it
     self::$parent_directory = $path_to_parent;
     // register framework any main xerxes class files
     self::registerClasses("{$path_to_parent}/lib/framework", "Xerxes_Framework");
     // initialize the configuration setting (Registry),
     // command-view mapping (ControllerMap), and
     // language translation (Languages) objects
     $objRegistry = Xerxes_Framework_Registry::getInstance();
     $objRegistry->init();
     $objControllerMap = Xerxes_Framework_ControllerMap::getInstance();
     $objControllerMap->init();
     // set the version number, for interface or other places
     $objRegistry->setConfig("XERXES_VERSION", $objControllerMap->getVersion(), true);
     // dynamically set the web path, if config says so,
     // doesn't work on all webserver/php set-ups, so an
     // explicit web path from config is preferred
     if ($objRegistry->getConfig("base_web_path", false) == '{dynamic}') {
         if (isset($_SERVER)) {
             $script_name = $_SERVER['SCRIPT_NAME'];
             $script_name = str_replace("/index.php", "", $script_name);
             $objRegistry->setConfig("base_web_path", $script_name);
         }
     }
     // give our session a name to keep sessions distinct between multiple
     // instances of xerxes on one server.  use base_path (preferably) or
     // application_name config directives.
     $path_base = $objRegistry->getConfig("base_web_path", false);
     $path_key = preg_replace('/\\W/', '_', $path_base);
     $session_name = "xerxessession_" . $path_key;
     if ($path_base == "") {
         $path_base = "/";
     }
     $session_path = $objRegistry->getConfig("session_path", false, $path_base);
     $session_domain = $objRegistry->getConfig("session_domain", false, null);
     session_name($session_name);
     session_set_cookie_params(0, $session_path, $session_domain);
     session_start();
     // processes the incoming request
     $objRequest = Xerxes_Framework_Request::getInstance();
     $objRequest->init();
     // utility classes
     // assists with basic paging/navigation elements for the view
     $objPage = new Xerxes_Framework_Page($objRequest, $objRegistry);
     // functions for special logging or handling of errors
     $objError = new Xerxes_Framework_Error();
     // language names
     $objLanguage = Xerxes_Framework_Languages::getInstance();
     $objLanguage->init();
     // we'll put the remaining code in a try-catch block in order to show friendly error page
     // for any uncaught exceptions
     try {
         ####################
         #  DISPLAY ERRORS  #
         ####################
         if ($objRegistry->getConfig("DISPLAY_ERRORS") == true) {
             error_reporting(E_ALL);
             ini_set('display_errors', '1');
         }
         ####################
         #   DEFAULTS       #
         ####################
         // labels
         $objLabels = Xerxes_Framework_Labels::getInstance();
         $lang = $objRequest->getProperty("lang");
         $objLabels->init($lang);
         // make sure application_name is passthrough, and has a value.
         $objRegistry->setConfig("application_name", $objRegistry->getConfig("APPLICATION_NAME", false, "Xerxes", $lang), true);
         ####################
         #     SET PATHS    #
         ####################
         ### reverse proxy
         // check to see if xerxes is running behind a reverse proxy and swap
         // host and remote ip here with their http_x_forwarded counterparts;
         // but only if configured for this, since client can spoof the header
         // if xerxes is not, in fact, behind a reverse proxy
         if ($objRegistry->getConfig("REVERSE_PROXY", false, false) == true) {
             $forward_host = $objRequest->getServer('HTTP_X_FORWARDED_HOST');
             $forward_address = $objRequest->getServer('HTTP_X_FORWARDED_FOR');
             if ($forward_host != "") {
                 $objRequest->setServer('SERVER_NAME', $forward_host);
             }
             // last ip address is the user's
             if ($forward_address != "") {
                 $arrIP = explode(",", $forward_address);
                 $objRequest->setServer('REMOTE_ADDR', trim(array_pop($arrIP)));
             }
         }
         // the working directory is the instance, so any relative paths will
         // be executed in relation to the root directory of the instance
         $working_dir = getcwd();
         $working_dir = str_replace("\\", "/", $working_dir);
         // full web path
         //
         // NOTE :if you change this code  make sure you make a corresponding
         // change in lib/framework/Error.php, since there is redundant code
         // there in case something goes horribly wrong and we need to set the
         // web path for proper display of a (friendly) error page
         $base_path = $objRegistry->getConfig('BASE_WEB_PATH', false, "");
         $this_server_name = $objRequest->getServer('SERVER_NAME');
         // check for a non-standard port
         $port = $objRequest->getServer('SERVER_PORT');
         if ($port == 80 || $port == 443) {
             $port = "";
         } else {
             $port = ":" . $port;
         }
         $protocol = "http://";
         if ($objRequest->getServer("HTTPS")) {
             $protocol = "https://";
         }
         $web = $protocol . $this_server_name . $port;
         // register these values
         $objRegistry->setConfig("SERVER_URL", $web);
         $objRegistry->setConfig("PATH_PARENT_DIRECTORY", $path_to_parent);
         $objRegistry->setConfig("APP_DIRECTORY", $working_dir);
         $objRegistry->setConfig("BASE_URL", $web . $base_path, true);
         ####################
         #   INSTRUCTIONS   #
         ####################
         // ControllerMap contains instructions for commands and views
         // based on the url parameters 'base' and 'action'
         $strBase = $objRequest->getProperty("base");
         $strAction = $objRequest->getProperty("action");
         $objControllerMap->setAction($strBase, $strAction, $objRequest);
         ####################
         #  ACCESS CONTROL  #
         ####################
         // if this part of the application is restricted to a local ip range, or requires a named login, then the
         // Restrict class will check the user's ip address or if they have logged in; failure stops the flow
         // and redirects user to a login page with the current request passed as 'return' paramater in the url
         $objRestrict = new Xerxes_Framework_Restrict($objRequest);
         // command line scripts will ignore access rules
         if ($objRequest->isCommandLine() != true) {
             if ($objControllerMap->isRestricted() == true) {
                 if ($objControllerMap->requiresLogin() == true) {
                     // resource requires a valid named username
                     $objRestrict->checkLogin();
                 } else {
                     // resource is resricted, but local ip range is okay
                     $objRestrict->checkIP();
                 }
             } else {
                 // go ahead and register local users, but don't prompt for login
                 $objRestrict->checkIP(false);
             }
         }
         // if this action is set to only be run via the command line, in order to prevent
         // web execution of potentially long-running tasks, then restrict it here
         if (!$objRequest->isCommandLine() && $objControllerMap->restrictToCLI()) {
             throw new Exception("cannot run command from web");
         }
         ####################
         #     INCLUDES     #
         ####################
         // files and directories that have been set to be included by the config file
         foreach ($objControllerMap->getIncludes() as $path_to_include) {
             self::registerClasses($path_to_parent . "/{$path_to_include}");
         }
         ####################
         #       DATA       #
         ####################
         // set-up the data by defining the root element
         $strDocumentElement = $objControllerMap->getDocumentElement();
         $objRequest->setDocumentElement($strDocumentElement);
         // pass config values that should be made available to the XSLT
         $objRequest->addDocument($objRegistry->publicXML());
         // the data will be built-up by calling one or more command classes
         // which will fetch their data based on other parameters supplied in
         // the request; returning that data as xml to a master xml dom document
         // inside the Xerxes_Framework_Request class, or in some cases specififying
         // a url to redirect the user out
         $commands = $objControllerMap->getCommands();
         foreach ($commands as $arrCommand) {
             $strDirectory = $arrCommand[0];
             // directory where the command class is located
             $strNamespace = $arrCommand[1];
             // prefix namespace of the command class
             $strClassFile = $arrCommand[2];
             // suffix name of the command class
             // directory where commands live
             $command_path = "{$path_to_parent}/commands/{$strDirectory}";
             // allow for a local override, even
             $local_command_path = "commands/{$strDirectory}";
             // echo "<h3>$strClassFile</h3>";
             // first, include any parent class, assuming that the parent class will
             // follow the naming convention of having the same name as the directory
             $strParentClass = Xerxes_Framework_Parser::strtoupper(substr($strDirectory, 0, 1)) . substr($strDirectory, 1);
             if (file_exists("{$local_command_path}/{$strParentClass}.php")) {
                 require_once "{$local_command_path}/{$strParentClass}.php";
             } elseif (file_exists("{$command_path}/{$strParentClass}.php")) {
                 require_once "{$command_path}/{$strParentClass}.php";
             }
             // if the specified command class exists in the distro or local commands folder, then
             // instantiate an object and execute it
             $strClass = $strNamespace . "_Command_" . $strClassFile;
             $local_command = file_exists("{$local_command_path}/{$strClassFile}.php");
             if (file_exists("{$command_path}/{$strClassFile}.php") || $local_command) {
                 // if the instance has a local version, take it!
                 if ($local_command) {
                     require_once "{$local_command_path}/{$strClassFile}.php";
                 } else {
                     require_once "{$command_path}/{$strClassFile}.php";
                 }
                 // instantiate the command class and execute it, but only
                 // if it extends xerxes_framework_command
                 $objCommand = new $strClass();
                 if ($objCommand instanceof Xerxes_Framework_Command) {
                     $objCommand->execute($objRequest, $objRegistry);
                 } else {
                     throw new Exception("command classes must be instance of Xerxes_Framework_Command");
                 }
             } else {
                 // if no command but a view was specified, then go ahead and show the view
                 // minus any data, since the view is doin' its own thang
                 if (!file_exists($objControllerMap->getView())) {
                     throw new Exception("invalid command {$strClass}");
                 }
             }
         }
         ####################
         #     COOKIES      #
         ####################
         // any cookies specified in the reuqest object? if so, set em now.
         $cookieSetParams = $objRequest->cookieSetParams();
         foreach ($cookieSetParams as $cookieParams) {
             set_cookie($cookieParams[0], $cookieParams[1], $cookieParams[2], $cookieParams[3], $cookieParams[4], $cookieParams[5]);
         }
         ####################
         #     REDIRECT     #
         ####################
         // if the result of the command is a redirect, we will stop the
         // flow and redirect the user out, unless overridden by the noRedirect
         // directive
         if ($objRequest->getRedirect() != null) {
             if ($objRequest->getProperty("noRedirect") == null) {
                 header("Location: " . $objRequest->getRedirect());
                 exit;
             } else {
                 // include in the resposne what the redirect would have been
                 $objRequest->setProperty("redirect", $objRequest->getRedirect());
             }
         }
         ####################
         #       VIEW       #
         ####################
         // SET THE HTTP HEADER
         //
         // we'll set the content-type, and potentially other header elements, based on the paramater 'format';
         // format must correspond to one of the pre-defined format content-types in setHeader() or can be a user-
         // defined format set in action.xml
         $format = $objRequest->getProperty("format");
         if ($objControllerMap->getFormat($format) != null) {
             header($objControllerMap->getFormat($format));
         } else {
             self::setHeader($format);
         }
         // get the xml from the request object, but exclude any server information
         // from being included if format=source
         $bolShowServer = true;
         if ($format == "xerxes") {
             $bolShowServer = false;
         }
         $objXml = new DOMDocument();
         $objXml = $objRequest->toXML($bolShowServer);
         // RAW XML DISPLAY
         //
         // you can append 'format=xerxes' to the querystring to have this controller spit back
         // the response in plain xml, which can be useful in some cases, like maybe AJAX?
         if ($format == "xerxes") {
             echo $objXml->saveXML();
         } else {
             // VIEW CODE
             //
             // ControllerMap contains instructions on what file to include for the view; typically
             // this will be an xslt file, but could be a php file if the xslt does not
             // provide enough flexibility; php page will inherit the xml dom document and
             // can go from there
             if ($objControllerMap->getView() == "") {
                 // No view specified, no view will be executed.
                 return;
             }
             // PHP CODE
             if ($objControllerMap->getViewType() != "xsl" && $objControllerMap->getViewType() != null) {
                 $file = $objControllerMap->getView();
                 $distro_file = $objRegistry->getConfig("PATH_PARENT_DIRECTORY", true) . "/lib/{$file}";
                 if (file_exists($file)) {
                     require_once $file;
                 } elseif (file_exists($distro_file)) {
                     require_once $distro_file;
                 } else {
                     throw new Exception("Could not find non-xsl view specified to include: {$file}");
                 }
             } else {
                 // XSLT CODE
                 $output = $objPage->transform($objXml, $objControllerMap->getView(), null);
                 // EMBEDED JAVASCRIPT DISPLAY
                 //
                 // you can append 'format=embed_html_js' to the querystring to output
                 // the content as a javascript source document with everything wrapped in
                 // document.write() statements
                 if ($format == "embed_html_js") {
                     // first escape any single quotes
                     $output = str_replace("'", "\\'", $output);
                     // now break the html into lines and output with document.write('')
                     $lines = explode("\n", $output);
                     $new_lines = array("// Javascript output. ");
                     foreach ($lines as $line) {
                         array_push($new_lines, "document.write('" . $line . "');");
                     }
                     $output = implode("\n", $new_lines);
                 }
                 echo $output;
             }
             //remove the flash message, intended for one display only.
             $objRequest->setSession("flash_message", null);
         }
     } catch (Exception $e) {
         $objError->handle($e, $objRequest, $objRegistry);
     }
 }
Esempio n. 3
0
 /**
  * Maps the marc data to the object's properties
  */
 protected function map()
 {
     ## openurl
     // the source can contain an openurl context object buried in it as well as marc-xml
     // test to see what profile the context object is using; set namespace accordingly
     if ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd:book", "book")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:book");
     } elseif ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd:dissertation", "dissertation")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:dissertation");
     } elseif ($this->document->getElementsByTagNameNS("info:ofi/fmt:xml:xsd", "journal")->item(0) != null) {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd");
     } else {
         $this->xpath->registerNamespace("rft", "info:ofi/fmt:xml:xsd:journal");
     }
     // context object:
     // these just in case
     $objATitle = $this->xpath->query("//rft:atitle")->item(0);
     $objBTitle = $this->xpath->query("//rft:atitle")->item(0);
     $objAuthors = $this->xpath->query("//rft:author[rft:aulast != '' or rft:aucorp != '']");
     $objGenre = $this->xpath->query("//rft:genre")->item(0);
     $objDate = $this->xpath->query("//rft:date")->item(0);
     // journal title, volume, issue, pages from context object
     $objTitle = $this->xpath->query("//rft:title")->item(0);
     $objSTitle = $this->xpath->query("//rft:stitle")->item(0);
     $objJTitle = $this->xpath->query("//rft:jtitle")->item(0);
     $objVolume = $this->xpath->query("//rft:volume")->item(0);
     $objIssue = $this->xpath->query("//rft:issue")->item(0);
     $objStartPage = $this->xpath->query("//rft:spage")->item(0);
     $objEndPage = $this->xpath->query("//rft:epage")->item(0);
     $objISSN = $this->xpath->query("//rft:issn")->item(0);
     $objISBN = $this->xpath->query("//rft:isbn")->item(0);
     if ($objSTitle != null) {
         $this->short_title = $objSTitle->nodeValue;
     }
     if ($objVolume != null) {
         $this->volume = $objVolume->nodeValue;
     }
     if ($objIssue != null) {
         $this->issue = $objIssue->nodeValue;
     }
     if ($objStartPage != null) {
         $this->start_page = $objStartPage->nodeValue;
     }
     if ($objEndPage != null) {
         $this->end_page = $objEndPage->nodeValue;
     }
     if ($objISBN != null) {
         array_push($this->isbns, $objISBN->nodeValue);
     }
     if ($objISSN != null) {
         array_push($this->issns, $objISSN->nodeValue);
     }
     if ($objGenre != null) {
         array_push($this->format_array, $objGenre->nodeValue);
     }
     // control and standard numbers
     $this->control_number = $this->controlfield("001")->__toString();
     $this->record_id = $this->control_number;
     $arrIssn = $this->fieldArray("022", "a");
     $arrIsbn = $this->fieldArray("020", "az");
     $this->govdoc_number = $this->datafield("086")->subfield("a")->__toString();
     $this->gpo_number = $this->datafield("074")->subfield("a")->__toString();
     // doi
     // this is kind of iffy since the 024 is not _really_ a DOI field; but this
     // is the most likely marc field; however need to see if the number follows the very loose
     // pattern of the DOI of 'prefix/suffix', where prefix and suffix can be nearly anything
     $field_024 = $this->fieldArray("024", "a");
     foreach ($field_024 as $doi) {
         // strip any doi: prefix
         $doi = str_ireplace("doi:", "", $doi);
         $doi = str_ireplace("doi", "", $doi);
         // got it!
         if (preg_match('/.*\\/.*/', $doi)) {
             $this->doi = $doi;
             break;
         }
     }
     $strJournalIssn = $this->datafield("773")->subfield("x")->__toString();
     if ($strJournalIssn != null) {
         array_push($arrIssn, $strJournalIssn);
     }
     // call number
     $strCallNumber = $this->datafield("050")->__toString();
     $strCallNumberLocal = $this->datafield("090")->__toString();
     if ($strCallNumber != null) {
         $this->call_number = $strCallNumber;
     } elseif ($strCallNumberLocal != null) {
         $this->call_number = $strCallNumberLocal;
     }
     // format
     $this->technology = $this->datafield("538")->subfield("a")->__toString();
     $arrFormat = $this->fieldArray("513", "a");
     foreach ($arrFormat as $format) {
         array_push($this->format_array, $format);
     }
     $strTitleFormat = $this->datafield("245")->subfield("k")->__toString();
     if ($strTitleFormat != null) {
         array_push($this->format_array, $strTitleFormat);
     }
     // thesis degree, institution, date awarded
     $strThesis = $this->datafield("502")->subfield("a")->__toString();
     ### title
     $this->title = $this->datafield("245")->subfield("an")->__toString();
     $this->sub_title = $this->datafield("245")->subfield("b")->__toString();
     $this->series_title = $this->datafield("440")->subfield("a")->__toString();
     $this->uniform_title = $this->datafield("130|240")->__toString();
     // sometimes title is in subfield p
     $title_part = $this->datafield("245")->subfield("p")->__toString();
     if ($this->title == "" && $title_part != "") {
         $this->title = $title_part;
     }
     // sometimes the title appears in a 242 or even a 246 if it is translated from another
     // language, although the latter is probably bad practice.  We will only take these
     // if the title in the 245 is blank, and take a 242 over the 246
     $strTransTitle = $this->datafield("242")->subfield("a")->__toString();
     $strTransSubTitle = $this->datafield("242")->subfield("b")->__toString();
     $strVaryingTitle = $this->datafield("246")->subfield("a")->__toString();
     $strVaryingSubTitle = $this->datafield("246")->subfield("b")->__toString();
     if ($this->title == "" && $strTransTitle != "") {
         $this->title = $strTransTitle;
         $this->trans_title = true;
     } elseif ($this->title == "" && $strVaryingTitle != "") {
         $this->title = $strVaryingTitle;
         $this->trans_title = true;
     }
     if ($this->sub_title == "" && $strTransSubTitle != "") {
         $this->sub_title = $strTransTitle;
         $this->trans_title = true;
     } elseif ($this->sub_title == "" && $strVaryingSubTitle != "") {
         $this->sub_title = $strVaryingSubTitle;
         $this->trans_title = true;
     }
     // last chance, check the context object
     if ($this->title == "" && $objATitle != null) {
         $this->title = $objATitle->nodeValue;
     } elseif ($this->title == "" && $objBTitle != null) {
         $this->title = $objBTitle->nodeValue;
     }
     // additional titles for display
     foreach ($this->datafield('730|740') as $additional_titles) {
         $subfields = $additional_titles->subfield()->__toString();
         array_push($this->additional_titles, $subfields);
     }
     // edition, extent, description
     $this->edition = $this->datafield("250")->subfield("a")->__toString();
     $this->extent = $this->datafield("300")->subfield("a")->__toString();
     $this->description = $this->datafield("300")->__toString();
     $this->price = $this->datafield("365")->__toString();
     // publisher
     $this->place = $this->datafield("260")->subfield("a")->__toString();
     $this->publisher = $this->datafield("260")->subfield("b")->__toString();
     // date
     $strDate = $this->datafield("260")->subfield("c")->__toString();
     // notes
     $arrToc = $this->fieldArray("505", "agrt");
     foreach ($arrToc as $toc) {
         $this->toc .= $toc;
     }
     $arrAbstract = $this->fieldArray("520", "a");
     $strLanguageNote = $this->datafield("546")->subfield("a")->__toString();
     // other notes
     $objNotes = $this->xpath("//marc:datafield[@tag >= 500 and @tag < 600 and @tag != 505 and @tag != 520 and @tag != 546]");
     foreach ($objNotes as $objNote) {
         array_push($this->notes, $objNote->nodeValue);
     }
     // subjects
     // we'll exclude the numeric subfields since they contain information about the
     // source of the subject terms, which are probably not needed for display?
     foreach ($this->datafield("6XX") as $subject) {
         $subfields = $subject->subfield("abcdefghijklmnopqrstuvwxyz");
         $subfields_array = array();
         foreach ($subfields as $subfield) {
             array_push($subfields_array, $subfield->__toString());
         }
         $subject_object = new Xerxes_Record_Subject();
         $subject_object->display = implode(" -- ", $subfields_array);
         $subject_object->value = $subfields->__toString();
         array_push($this->subjects, $subject_object);
     }
     // series information
     foreach ($this->datafield('4XX|800|810|811|830') as $subject) {
         array_push($this->series, $subject->__toString());
     }
     // journal
     // specify the order of the subfields in 773 for journal as $a $t $g and then everything else
     //  in case they are out of order
     $this->journal = $this->datafield("773")->subfield("atgbcdefhijklmnopqrsuvwxyz1234567890", true)->__toString();
     $strJournal = $this->datafield("773")->subfield("agpqt")->__toString();
     $this->journal_title = $this->datafield("773")->subfield("t")->__toString();
     $this->short_title = $this->datafield("773")->subfield("p")->__toString();
     $strExtentHost = $this->datafield("773")->subfield("h")->__toString();
     // alternate character-scripts
     // the 880 represents an alternative character-script, like Hebrew or CJK;
     // for simplicity's sake, we just dump them all here in an array, with the
     // intent of displaying them in paragraphs together in the interface or something?
     // we get every field except for the $6 which is a linking field
     $this->alt_scripts = $this->fieldArray("880", "abcdefghijklmnopqrstuvwxyz12345789");
     // now use the $6 to figure out which character-script this is
     // assume just one for now
     $strAltScript = $this->datafield("880")->subfield("6")->__toString();
     if ($strAltScript != null) {
         $arrMatchCodes = array();
         $arrScriptCodes = array("(3" => "Arabic", "(B" => "Latin", '$1' => "CJK", "(N" => "Cyrillic", "(S" => "Greek", "(2" => "Hebrew");
         if (preg_match('/[0-9]{3}-[0-9]{2}\\/([^\\/]*)/', $strAltScript, $arrMatchCodes)) {
             if (array_key_exists($arrMatchCodes[1], $arrScriptCodes)) {
                 $this->alt_script_name = $arrScriptCodes[$arrMatchCodes[1]];
             }
         }
     }
     ### volume, issue, pagination
     // a best guess extraction of volume, issue, pages from 773
     $arrRegExJournal = $this->parseJournalData($strJournal);
     // some sources include ^ as a filler character in issn/isbn, these people should be shot!
     foreach ($arrIssn as $strIssn) {
         if (strpos($strIssn, "^") === false) {
             array_push($this->issns, $strIssn);
         }
     }
     foreach ($arrIsbn as $strIsbn) {
         if (strpos($strIsbn, "^") === false) {
             array_push($this->isbns, $strIsbn);
         }
     }
     ### language
     $langConverter = Xerxes_Framework_Languages::getInstance();
     // take an explicit language note over 008 if available
     if ($strLanguageNote != null) {
         $strLanguageNote = $this->stripEndPunctuation($strLanguageNote, ".");
         if (strlen($strLanguageNote) == 2) {
             $this->language = $langConverter->getNameFromCode('iso_639_1_code', $strLanguageNote);
         } elseif (strlen($strLanguageNote) == 3) {
             $this->language = $langConverter->getNameFromCode('iso_639_2B_code', $strLanguageNote);
         } elseif (!stristr($strLanguageNote, "Undetermined")) {
             $this->language = str_ireplace("In ", "", $strLanguageNote);
             $language = $langConverter->getNameFromCode('name', ucfirst($this->language));
             if ($language != null) {
                 $this->language = $language;
             }
         }
     } else {
         // get the language code from the 008
         $objLang = $this->controlfield("008")->__toString();
         if ($objLang instanceof Xerxes_Marc_ControlField) {
             $strLangCode = $objLang->position("35-37");
             if ($strLangCode != "") {
                 $this->language = $langConverter->getNameFromCode('iso_639_2B_code', $strLanguageNote);
             }
         }
     }
     ### format
     $this->format = $this->parseFormat($this->format_array);
     ### full-text
     // examine the 856s present in the record to see if they are in
     // fact to full-text, and not to a table of contents or something
     // stupid like that
     foreach ($this->datafield("856") as $link) {
         $resource_type = $link->ind2;
         $part = $link->subfield("3")->__toString();
         $strUrl = $link->subfield("u")->__toString();
         $strHostName = $link->subfield("a")->__toString();
         $strDisplay = $link->subfield("z")->__toString();
         $strLinkFormatType = $link->subfield("q")->__toString();
         $strLinkText = $link->subfield("y")->__toString();
         if ($strDisplay == "") {
             if ($strLinkText != "") {
                 $strDisplay = $strLinkText;
             } elseif ($strHostName != "") {
                 $strDisplay = $strHostName;
             }
         }
         if ($part != "") {
             $strDisplay = $part . " " . $strDisplay;
         }
         // no link supplied
         if ($link->subfield("u")->__toString() == "") {
             continue;
         }
         // link includes loc url (bad catalogers!)
         if (stristr($strUrl, "catdir") || $resource_type == 2) {
             array_push($this->links, array(null, $link->subfield("u")->__toString(), "none"));
         } else {
             $strLinkFormat = "online";
             if (stristr($strDisplay, "PDF") || stristr($strUrl, "PDF") || stristr($strLinkFormatType, "PDF") || stristr($strLinkText, "PDF")) {
                 $strLinkFormat = "pdf";
             } elseif (stristr($strDisplay, "HTML") || stristr($strLinkFormatType, "HTML") || stristr($strLinkText, "HTML")) {
                 $strLinkFormat = "html";
             }
             array_push($this->links, array($strDisplay, $strUrl, $strLinkFormat));
         }
     }
     ### oclc number
     // oclc number can be either in the 001 or in the 035$a
     // make sure 003 says 001 is oclc number or 001 includes an oclc prefix,
     $str001 = $this->controlfield("001")->__toString();
     $str003 = $this->controlfield("003")->__toString();
     $str035 = $this->datafield("035")->subfield("a")->__toString();
     if ($str001 != "" && ($str003 == "" && preg_match('/^\\(?([Oo][Cc])/', $str001) || $str003 == "OCoLC")) {
         $this->oclc_number = $str001;
     } elseif (strpos($str035, "OCoLC") !== false) {
         $this->oclc_number = $str035;
     }
     // get just the number
     $arrOclc = array();
     if (preg_match("/[0-9]{1,}/", $this->oclc_number, $arrOclc) != 0) {
         $strJustOclcNumber = $arrOclc[0];
         // strip out leading 0s
         $strJustOclcNumber = preg_replace("/^0{1,8}/", "", $strJustOclcNumber);
         $this->oclc_number = $strJustOclcNumber;
     }
     ### summary
     // abstract
     foreach ($arrAbstract as $strAbstract) {
         $this->abstract .= " " . $strAbstract;
     }
     $this->abstract = trim(strip_tags($this->abstract));
     // summary
     if ($this->abstract != "") {
         $this->summary = $this->abstract;
         $this->summary_type = "abstract";
     } elseif ($this->toc != "") {
         $this->summary = $this->toc;
         $this->summary_type = "toc";
     } elseif (count($this->subjects) > 0) {
         $this->summary_type = "subjects";
         for ($x = 0; $x < count($this->subjects); $x++) {
             $subject_object = $this->subjects[$x];
             $this->summary .= $subject_object->value;
             if ($x < count($this->subjects) - 1) {
                 $this->summary .= "; ";
             }
         }
     }
     ### journal title
     // we'll take the journal title form the 773$t as the best option,
     if ($this->journal_title == "") {
         // otherwise see if context object has one
         if ($objJTitle != null) {
             $this->journal_title = $objJTitle->nodeValue;
         } elseif ($objTitle != null) {
             $this->journal_title = $objTitle->nodeValue;
         } elseif ($this->short_title != "" && ($this->format == "Article" || $this->format == "Journal" || $this->format == "Newspaper")) {
             $this->journal_title = $this->short_title;
         }
     }
     ### volume
     if ($this->volume == "") {
         if (array_key_exists("volume", $arrRegExJournal)) {
             $this->volume = $arrRegExJournal["volume"];
         }
     }
     ### issue
     if ($this->issue == "") {
         if (array_key_exists("issue", $arrRegExJournal)) {
             $this->issue = $arrRegExJournal["issue"];
         }
     }
     ### pages
     // start page
     if ($this->start_page == "") {
         if (array_key_exists("spage", $arrRegExJournal)) {
             $this->start_page = $arrRegExJournal["spage"];
         }
     }
     // end page
     if ($this->end_page == "") {
         if (array_key_exists("epage", $arrRegExJournal)) {
             // found an end page from our generic regular expression parser
             $this->end_page = $arrRegExJournal["epage"];
         } elseif ($strExtentHost != "" && $this->start_page != "") {
             // there is an extent note, indicating the number of pages,
             // calculate end page based on that
             $arrExtent = array();
             if (preg_match('/([0-9]{1})\\/([0-9]{1})/', $strExtentHost, $arrExtent) != 0) {
                 // if extent expressed as a fraction of a page, just take
                 // the start page as the end page
                 $this->end_page = $this->start_page;
             } elseif (preg_match("/[0-9]{1,}/", $strExtentHost, $arrExtent) != 0) {
                 // otherwise take whole number
                 $iStart = (int) $this->start_page;
                 $iEnd = (int) $arrExtent[0];
                 $this->end_page = $iStart + ($iEnd - 1);
             }
         }
     }
     // page normalization
     if ($this->end_page != "" && $this->start_page != "") {
         // pages were input as 197-8 or 197-82, or similar, so convert
         // the last number to the actual page number
         if (strlen($this->end_page) < strlen($this->start_page)) {
             $strMissing = substr($this->start_page, 0, strlen($this->start_page) - strlen($this->end_page));
             $this->end_page = $strMissing . $this->end_page;
         }
     }
     ### isbn
     // get just the isbn minus format notes
     for ($x = 0; $x < count($this->isbns); $x++) {
         $arrIsbnExtract = array();
         $this->isbns[$x] = str_replace("-", "", $this->isbns[$x]);
         if (preg_match("/[0-9]{12,13}X{0,1}/", $this->isbns[$x], $arrIsbnExtract) != 0) {
             $this->isbns[$x] = $arrIsbnExtract[0];
         } elseif (preg_match("/[0-9]{9,10}X{0,1}/", $this->isbns[$x], $arrIsbnExtract) != 0) {
             $this->isbns[$x] = $arrIsbnExtract[0];
         }
     }
     ### thesis
     // most 502 fields follow the following pattern, which we will use to
     // match and extract individual elements:
     // Thesis (M.F.A.)--University of California, San Diego, 2005
     // Thesis (Ph. D.)--Queen's University, Kingston, Ont., 1977.
     if ($strThesis != "") {
         // extract degree conferred
         $arrDegree = array();
         if (preg_match('/\\(([^\\(]*)\\)/', $strThesis, $arrDegree) != 0) {
             $this->degree = $arrDegree[1];
         }
         // extract institution
         $iInstPos = strpos($strThesis, "--");
         if ($iInstPos !== false) {
             $strInstitution = "";
             // get everything after the --
             $strInstitution = substr($strThesis, $iInstPos + 2, strlen($strThesis) - 1);
             // find last comma in remaining text
             $iEndPosition = strrpos($strInstitution, ",");
             if ($iEndPosition !== false) {
                 $strInstitution = substr($strInstitution, 0, $iEndPosition);
             }
             $this->institution = $strInstitution;
         }
         // extract year conferred
         $this->year = $this->extractYear($strThesis);
     }
     ### title
     $this->non_sort = strip_tags($this->non_sort);
     $this->title = strip_tags($this->title);
     $this->sub_title = strip_tags($this->sub_title);
     // make sure subtitle is properly parsed out
     $iColon = strpos($this->title, ":");
     if ($this->sub_title == "" && $iColon !== false) {
         $this->sub_title = trim(substr($this->title, $iColon + 1));
         $this->title = trim(substr($this->title, 0, $iColon));
     }
     // make sure nonSort portion of the title is extracted
     // punctuation; we'll also *add* the definite/indefinite article below should
     // the quote be followed by one of those -- this is all in english, yo!
     if (strlen($this->title) > 0) {
         if (substr($this->title, 0, 1) == "\"" || substr($this->title, 0, 1) == "'") {
             $this->non_sort = substr($this->title, 0, 1);
             $this->title = substr($this->title, 1);
         }
     }
     // common definite and indefinite articles
     if (strlen($this->title) > 4) {
         if (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 4)) == "the ") {
             $this->non_sort .= substr($this->title, 0, 4);
             $this->title = substr($this->title, 4);
         } elseif (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 2)) == "a ") {
             $this->non_sort .= substr($this->title, 0, 2);
             $this->title = substr($this->title, 2);
         } elseif (Xerxes_Framework_Parser::strtolower(substr($this->title, 0, 3)) == "an ") {
             $this->non_sort .= substr($this->title, 0, 3);
             $this->title = substr($this->title, 3);
         }
     }
     ### year
     if ($strDate != "") {
         $this->year = $this->extractYear($strDate);
     } elseif ($this->extractYear($this->publisher)) {
         // off chance that the date is hanging out in the publisher field;
         // might as well strip it out here as well
         $this->year = $this->extractYear($this->publisher);
         $this->publisher = str_replace($this->year, "", $this->publisher);
     } elseif ($this->extractYear($this->journal)) {
         // perhaps somewhere in the 773$g
         $this->year = $this->extractYear($this->journal);
     }
     // last chance grab from context object
     if ($this->year == "" && $objDate != null) {
         $this->year = $this->extractYear($objDate->nodeValue);
     }
     #### authors
     // authors
     $this->author_from_title = $this->datafield("245")->subfield("c")->__toString();
     $objConfName = $this->datafield("111");
     // "anc"
     $objAddAuthor = $this->datafield("700");
     // "a"
     $objAddCorp = $this->datafield("710");
     //, "ab"
     $objAddConf = $this->datafield("711");
     // "acn"
     // conference and corporate names from title ?
     $objConferenceTitle = $this->datafield("811");
     // all
     if ($objAddConf->length() == 0 && $objConferenceTitle->length() > 0) {
         $objAddConf = $objConferenceTitle;
     }
     $objCorporateTitle = $this->datafield("810");
     // all
     if ($objAddCorp->length() == 0 && $objCorporateTitle->length() > 0) {
         $objAddCorp = $objCorporateTitle;
     }
     if ($objConfName->length() > 0 || $objAddConf->length() > 0) {
         array_push($this->format_array, "conference paper");
     }
     // personal primary author
     if ($this->datafield("100")->length() > 0) {
         $objXerxesAuthor = $this->splitAuthor($this->datafield("100"), "a", "personal");
         array_push($this->authors, $objXerxesAuthor);
     } elseif ($objAddAuthor->length() > 0) {
         // editor
         $objXerxesAuthor = $this->splitAuthor($objAddAuthor->item(0), "a", "personal", true);
         array_push($this->authors, $objXerxesAuthor);
         $this->editor = true;
     }
     // additional personal authors
     if ($objAddAuthor->length() > 0) {
         // if there is an editor it has already been included in the array
         // so we need to skip the first author in the list
         if ($this->editor == true) {
             $objAddAuthor->next();
         }
         foreach ($objAddAuthor as $obj700) {
             $objXerxesAuthor = $this->splitAuthor($obj700, "a", "personal", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // corporate author
     if ($this->datafield("110")->subfield("ab")->__toString() != "") {
         $objXerxesAuthor = $this->splitAuthor($this->datafield("110"), "ab", "corporate");
         array_push($this->authors, $objXerxesAuthor);
     }
     // additional corporate authors
     if ($objAddCorp->length() > 0) {
         foreach ($objAddCorp as $objCorp) {
             $objXerxesAuthor = $this->splitAuthor($objCorp, "ab", "corporate", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // conference name
     if ($objConfName->length() > 0) {
         $objXerxesAuthor = $this->splitAuthor($objConfName, "anc", "conference");
         array_push($this->authors, $objXerxesAuthor);
     }
     // additional conference names
     if ($objAddConf->length() > 0) {
         foreach ($objAddConf as $objConf) {
             $objXerxesAuthor = $this->splitAuthor($objConf, "acn", "conference", true);
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // last-chance from context-object
     if (count($this->authors) == 0 && $objAuthors != null) {
         foreach ($objAuthors as $objAuthor) {
             $objXerxesAuthor = new Xerxes_Record_Author();
             foreach ($objAuthor->childNodes as $objAuthAttr) {
                 switch ($objAuthAttr->localName) {
                     case "aulast":
                         $objXerxesAuthor->last_name = $objAuthAttr->nodeValue;
                         $objXerxesAuthor->type = "personal";
                         break;
                     case "aufirst":
                         $objXerxesAuthor->first_name = $objAuthAttr->nodeValue;
                         break;
                     case "auinit":
                         $objXerxesAuthor->init = $objAuthAttr->nodeValue;
                         break;
                     case "aucorp":
                         $objXerxesAuthor->name = $objAuthAttr->nodeValue;
                         $objXerxesAuthor->type = "corporate";
                         break;
                 }
             }
             array_push($this->authors, $objXerxesAuthor);
         }
     }
     // construct a readable journal field if none supplied
     if ($this->journal == "") {
         if ($this->journal_title != "") {
             $this->journal = $this->toTitleCase($this->journal_title);
             if ($this->volume != "") {
                 $this->journal .= " vol. " . $this->volume;
             }
             if ($this->issue != "") {
                 $this->journal .= " iss. " . $this->issue;
             }
             if ($this->year != "") {
                 $this->journal .= " (" . $this->year . ")";
             }
         }
     }
     ## de-duping
     // make sure no dupes in author array
     $author_original = $this->authors;
     $author_other = $this->authors;
     for ($x = 0; $x < count($author_original); $x++) {
         $objXerxesAuthor = $author_original[$x];
         if ($objXerxesAuthor instanceof Xerxes_Record_Author) {
             $this_author = $objXerxesAuthor->allFields();
             for ($a = 0; $a < count($author_other); $a++) {
                 if ($a != $x) {
                     $objThatAuthor = $author_other[$a];
                     if ($objThatAuthor instanceof Xerxes_Record_Author) {
                         $that_author = $objThatAuthor->allFields();
                         if ($this_author == $that_author) {
                             // remove the dupe
                             $author_original[$a] = null;
                         }
                     }
                 }
             }
         }
     }
     $this->authors = array();
     // reset author array
     foreach ($author_original as $author) {
         if ($author instanceof Xerxes_Record_Author) {
             array_push($this->authors, $author);
         }
     }
     // make sure no dupes and no blanks in standard numbers
     $arrISSN = $this->issns;
     $arrISBN = $this->isbns;
     $this->issns = array();
     $this->isbns = array();
     foreach ($arrISSN as $strISSN) {
         $strISSN = trim($strISSN);
         if ($strISSN != "") {
             $strISSN = str_replace("-", "", $strISSN);
             //extract the issn number leaving behind extra chars and comments
             $match = array();
             if (preg_match("/[0-9]{8,8}/", $strISSN, $match)) {
                 $strISSN = $match[0];
             }
             array_push($this->issns, $strISSN);
         }
     }
     foreach ($arrISBN as $strISBN) {
         $strISBN = trim($strISBN);
         if ($strISBN != "") {
             $strISBN = str_replace("-", "", $strISBN);
             array_push($this->isbns, $strISBN);
         }
     }
     $this->issns = array_unique($this->issns);
     $this->isbns = array_unique($this->isbns);
     ### punctuation clean-up
     $this->book_title = $this->stripEndPunctuation($this->book_title, "./;,:");
     $this->title = $this->stripEndPunctuation($this->title, "./;,:");
     $this->sub_title = $this->stripEndPunctuation($this->sub_title, "./;,:");
     $this->short_title = $this->stripEndPunctuation($this->short_title, "./;,:");
     $this->journal_title = $this->stripEndPunctuation($this->journal_title, "./;,:");
     $this->series_title = $this->stripEndPunctuation($this->series_title, "./;,:");
     $this->technology = $this->stripEndPunctuation($this->technology, "./;,:");
     $this->place = $this->stripEndPunctuation($this->place, "./;,:");
     $this->publisher = $this->stripEndPunctuation($this->publisher, "./;,:");
     $this->edition = $this->stripEndPunctuation($this->edition, "./;,:");
     for ($x = 0; $x < count($this->authors); $x++) {
         foreach ($this->authors[$x] as $key => $value) {
             $objXerxesAuthor = $this->authors[$x];
             foreach ($objXerxesAuthor as $key => $value) {
                 $objXerxesAuthor->{$key} = $this->stripEndPunctuation($value, "./;,:");
             }
             $this->authors[$x] = $objXerxesAuthor;
         }
     }
     for ($s = 0; $s < count($this->subjects); $s++) {
         $subject_object = $this->subjects[$s];
         $subject_object->value = $this->stripEndPunctuation($subject_object->value, "./;,:");
         $this->subjects[$s] = $subject_object;
     }
 }
Esempio n. 4
0
 public function doExecute()
 {
     $objXml = new DOMDocument();
     $objXml->loadXML("<navbar />");
     ### saved records link
     $arrLink = array("base" => "folder");
     // make sure there is no return if coming from login to prevent a spider
     // from thinking this is a different page
     if ($this->request->getProperty("base") != "authenticate") {
         $arrLink["return"] = $this->request->getServer("REQUEST_URI");
     }
     $savedRecordsLink = $this->addNavbarElement($objXml, "saved_records", $arrLink);
     // add numSavedRecords  and sessionSavedRecords for proper icon display
     $objData = new Xerxes_DataMap();
     $num = $objData->totalRecords($this->request->getSession("username"));
     $savedRecordsLink->setAttribute("numSavedRecords", (string) $num);
     $savedRecordsLink->setAttribute("numSessionSavedRecords", Xerxes_Helper::numMarkedSaved());
     ### my collections (i.e., databases)
     $arrCollectionUrl = array("base" => "collections", "action" => "list");
     if (Xerxes_Framework_Restrict::isAuthenticatedUser($this->request)) {
         $arrCollectionUrl["username"] = $this->request->getSession("username");
     }
     $this->addNavbarElement($objXml, "saved_collections", $arrCollectionUrl);
     ### authentication
     // tell it to force an https url if so configured.
     $force_secure_login = false;
     if ($this->registry->getConfig("secure_login", false) == "true") {
         $force_secure_login = true;
     }
     // login
     $this->addNavbarElement($objXml, "login", array("base" => "authenticate", "action" => "login", "return" => $this->request->getServer("REQUEST_URI")), $force_secure_login);
     // logout
     $this->addNavbarElement($objXml, "logout", array("base" => "authenticate", "action" => "logout", "return" => $this->request->getServer("REQUEST_URI")));
     ### db alphabetical list
     $this->addNavbarElement($objXml, "database_list", array("base" => "databases", "action" => "alphabetical"));
     ### languages
     $languages = $this->registry->getConfig("LANGUAGES", false);
     if ($languages != null) {
         // map locales to language codes
         foreach ($languages as $language) {
             $order = NULL;
             $code = NULL;
             foreach ($language->attributes() as $name => $val) {
                 if ($name == "code") {
                     $code = (string) $val;
                 }
                 if ($name == "locale") {
                     $locale = (string) $val;
                     if ($locale == '') {
                         $locale = 'C';
                     }
                 }
             }
             $locales[$code] = $locale;
         }
         $languages_xml = $objXml->createElement("languages");
         $objXml->documentElement->appendChild($languages_xml);
         $language_names = Xerxes_Framework_Languages::getInstance();
         foreach ($languages->language as $language) {
             $code = (string) $language["code"];
             $readable_name = $language_names->getNameFromCode("iso_639_2B_code", $code, $locales[$code]);
             $native_name = $language_names->getNameFromCode("iso_639_2B_code", $code);
             $language_node = $objXml->createElement("language");
             $languages_xml->appendChild($language_node);
             $language_node->setAttribute("code", $code);
             $language_node->setAttribute("name", $readable_name);
             $language_node->setAttribute("native_name", $native_name);
             $language_node->setAttribute("locale", $locales[$code]);
             // link back to home page
             $current_params = $this->request->getURIProperties();
             // this page
             // this is necessary on the home page
             if (!array_key_exists("base", $current_params)) {
                 $current_params["base"] = "";
             }
             // subject pages can't support a swap, so send user back to home page
             if (($current_params["base"] == "databases" || $current_params["base"] == "embed") && array_key_exists("subject", $current_params)) {
                 $current_params = array();
                 $current_params["base"] = "";
             }
             // add the languages
             $current_params["lang"] = $code;
             // with language set
             $url = $this->request->url_for($current_params);
             $language_node->setAttribute("url", $url);
         }
     }
     $this->request->addDocument($objXml);
     return 1;
 }