예제 #1
0
 /**
  * Converts a sting to a normalized (no-spaces, non-letters) string
  *
  * @param string $strSubject	original string
  * @return string				normalized string
  */
 public static function normalize($strSubject)
 {
     $strNormalized = iconv('UTF-8', 'ASCII//TRANSLIT', $strSubject);
     // this is influenced by the setlocale() call with category LC_CTYPE; see PopulateDatabases.php
     $strNormalized = Xerxes_Framework_Parser::strtolower($strNormalized);
     $strNormalized = str_replace("&", "", $strNormalized);
     $strNormalized = str_replace("'", "", $strNormalized);
     $strNormalized = str_replace("+", "-", $strNormalized);
     $strNormalized = str_replace(" ", "-", $strNormalized);
     $strNormalized = Xerxes_Framework_Parser::preg_replace('/\\W/', "-", $strNormalized);
     while (strstr($strNormalized, "--")) {
         $strNormalized = str_replace("--", "-", $strNormalized);
     }
     return $strNormalized;
 }
예제 #2
0
 public function doExecute()
 {
     $configMemory = $this->registry->getConfig("HARVEST_MEMORY_LIMIT", false, "500M");
     ini_set("memory_limit", $configMemory);
     echo "\n\nSFX INSTITUTIONAL HOLDINGS POPULATION \n\n";
     // You can define the export file on sfx as having an instance extension, so
     // give the client the opportunity to define that here
     $strInstance = $this->request->getProperty("instance");
     if ($strInstance != "") {
         $strInstance = "-" . $strInstance;
     }
     // construct the address to Google Scholar institutional
     // holdings file on SFX. Either SFX specific config, or
     // general link resolver config.
     $configSfx = $this->registry->getConfig("ALTERNATE_FULLTEXT_HARVEST_ADDRESS", false, $this->registry->getConfig("LINK_RESOLVER_ADDRESS", false));
     if (!$configSfx) {
         throw new Exception("Can not run populate action, no link resolver address configured. " . "Need config ALTERNATE_FULLTEXT_HARVEST_ADDRESS or LINK_RESOLVER_ADDRESS.");
     }
     // fire-up a transaction with the database
     $objData = new Xerxes_DataMap();
     $objData->beginTransaction();
     // clear old data
     echo "  Flushing SFX fulltext table . . . ";
     $objData->clearFullText();
     echo "done.\n";
     // try to get the data from sfx
     $done = false;
     $x = 0;
     while ($done == false) {
         $x++;
         $strUrl = $configSfx . "/cgi/public/get_file.cgi?file=institutional_holding" . $strInstance . '-.prt' . str_pad($x, 2, '0', STR_PAD_LEFT) . ".xml";
         echo "  Pulling down SFX inst holding file ({$x}) . . . ";
         try {
             $strResponse = Xerxes_Framework_Parser::request($strUrl);
             $objXml = new SimpleXMLElement($strResponse);
         } catch (Exception $e) {
             if ($x == 1) {
                 throw new Exception("cannot get institutional holding file from sfx: '{$strUrl}'. " . "If this is the correct SFX server address, make sure your SFX allows access to " . "institutional holding file from this IP address in config/get_file_restriction.config " . "on SFX server.");
             }
             $done = true;
         }
         echo "done.\n";
         if (!$done) {
             echo "  Processing file . . . ";
             $objItems = $objXml->xpath("//item[@type != 'other']");
             if ($objItems == false) {
                 throw new Exception("could not find items in inst holding file.");
             }
             echo "done.\n";
             echo "  Adding to database . . . ";
             foreach ($objItems as $objItem) {
                 foreach ($objItem->coverage as $objCoverage) {
                     $objFullText = new Xerxes_Data_Fulltext();
                     $objFullText->issn = (string) $objItem->issn;
                     $objFullText->issn = str_replace("-", "", $objFullText->issn);
                     $objFullText->title = (string) $objItem->title;
                     $objFullText->title = urlencode($objFullText->title);
                     $objFullText->title = substr(Xerxes_Framework_Parser::strtolower($objFullText->title), 0, 100);
                     $objFullText->startdate = (int) $objCoverage->from->year;
                     $objFullText->enddate = (int) $objCoverage->to->year;
                     if ($objFullText->enddate == 0) {
                         $objFullText->enddate = 9999;
                     }
                     $objFullText->embargo = (int) $objCoverage->embargo->days_not_available;
                     $objFullText->updated = date("YmdHis");
                     // add it to the database
                     $objData->addFulltext($objFullText);
                 }
             }
             echo "done.\n";
         }
     }
     echo "  Commiting changes . . . ";
     $objData->commit();
     echo "done.\n";
     return 1;
 }
예제 #3
0
 /**
  * Set a value for a configuration, from code rather than the file
  *
  * @param string $key		configuration setting name
  * @param mixed $value		value. Generally String or SimpleXMLElement. 
  * @param bool $bolPass		[optional] whether value should be passed to XML (default false)
  */
 public function setConfig($key, $value, $bolPass = false)
 {
     $this->arrConfig[Xerxes_Framework_Parser::strtoupper($key)] = $value;
     if ($bolPass == true) {
         $this->arrPass[Xerxes_Framework_Parser::strtolower($key)] = $value;
     }
 }
예제 #4
0
 private function formatting($data, $node)
 {
     $style = "";
     // stylistic rendering
     // stylistic elements
     foreach ($node->attributes as $attribute) {
         if ($attribute->getName() == "font-family" || $attribute->getName() == "font-style" || $attribute->getName() == "font-variant" || $attribute->getName() == "font-weight" || $attribute->getName() == "text-decoration" || $attribute->getName() == "vertical-align" || $attribute->getName() == "display") {
             $style .= " " . $attribute->getName() . ": " . (string) $attribute;
         }
     }
     // capitalization
     if ($node["text-case"]) {
         switch ((string) $node["text-case"]) {
             case "lowercase":
                 $data = Xerxes_Framework_Parser::strtolower($data);
                 break;
             case "uppercase":
                 $data = Xerxes_Framework_Parser::strtoupper($data);
                 break;
             case "capitalize-first":
             case "sentence":
                 $data = Xerxes_Framework_Parser::strtoupper(substr($data, 0, 1)) . substr($data, 1);
                 break;
             case "capitalize-all":
                 //TODO: add this to parser?
                 break;
             case "title":
                 //TODO: make reference to parser?
                 break;
         }
     }
     // stylistic rendering
     if ($style != "") {
         $data = "<span style=\"{$style}\">{$data}</span>";
     }
     // add quotes
     if ($node["quotes"]) {
         $data = "\"" . $data . "\"";
     }
     return $node["prefix"] . $data . $node["suffix"];
 }
예제 #5
0
 /**
  * Get localized language name of provided ISO 639 code
  *
  * @param string $type			the standard according to which the code will be interpreted;
  * 					one of: iso_639_1_code, iso_639_2B_code
  * @param string $code			the 2-letter language code
  * @param string $override_locale	use this locale instead of Xerxes locale
  * @return mixed			A string with the localized language name or NULL if the code is not valid
  */
 public function getNameFromCode($type, $code, $override_locale = null)
 {
     if ($type != 'name') {
         $code = Xerxes_Framework_Parser::strtolower($code);
     }
     $elements = $this->xpath->query("//iso_639_entry[@{$type}='{$code}']");
     if (!is_null($elements)) {
         foreach ($elements as $element) {
             $name = $element->getAttribute('name');
             if ($this->gettext == false) {
                 return $name;
             }
             $originalLocale = $this->getXerxesLocale();
             if ($override_locale == null) {
                 $this->setXerxesLocale($this->locale);
             } else {
                 $this->setXerxesLocale($override_locale);
             }
             $languageName = dgettext($this->domain, $name);
             $this->setXerxesLocale($originalLocale);
             return $languageName;
         }
     } else {
         return null;
     }
 }
예제 #6
0
 /**
  * Add global array as xml to request xml document
  *
  * @param DOMDocument $objXml		[by reference] request xml document
  * @param DOMNode $objAppend		[by reference] node to append values to
  * @param array $arrValues			global array
  */
 private function addElement(&$objXml, &$objAppend, $arrValues)
 {
     foreach ($arrValues as $key => $value) {
         // need to make sure the xml element has a valid name
         // and not something crazy with spaces or commas, etc.
         $strSafeKey = Xerxes_Framework_Parser::strtolower(preg_replace('/\\W/', '_', $key));
         if (is_array($value)) {
             foreach ($value as $strKey => $strValue) {
                 $objElement = $objXml->createElement($strSafeKey);
                 $objElement->setAttribute("key", $strKey);
                 $objAppend->appendChild($objElement);
                 if (is_array($strValue)) {
                     // multi-dimensional arrays will be recursively added
                     $this->addElement($objXml, $objElement, $strValue);
                 } else {
                     $objElement->nodeValue = Xerxes_Framework_Parser::escapeXml($strValue);
                 }
             }
         } else {
             $objElement = $objXml->createElement($strSafeKey, Xerxes_Framework_Parser::escapeXml($value));
             $objAppend->appendChild($objElement);
         }
     }
 }
예제 #7
0
 public function doExecute()
 {
     $strUsername = $this->request->getSession("username");
     $iRecord = $this->request->getProperty("record");
     $strTags = $this->request->getProperty("tags");
     // updated tags
     $strShadowTags = $this->request->getProperty("tagsShaddow");
     // original tags
     //  split tags out on comma
     $arrShadow = explode(",", $strShadowTags);
     $arrTags = explode(",", $strTags);
     for ($x = 0; $x < count($arrTags); $x++) {
         $arrTags[$x] = Xerxes_Framework_Parser::strtolower(trim($arrTags[$x]));
     }
     for ($x = 0; $x < count($arrShadow); $x++) {
         $arrShadow[$x] = Xerxes_Framework_Parser::strtolower(trim($arrShadow[$x]));
     }
     // remove any duplicates
     $arrTags = array_unique($arrTags);
     // update the database
     $objData = new Xerxes_DataMap();
     $objData->assignTags($strUsername, $arrTags, $iRecord);
     // now update the cached version without recalculating all the
     // totals with a round-trip to the database
     $arrStored = $this->request->getSession("tags");
     // see which tags are new and which are actually being deleted or changed
     $arrDelete = array_diff($arrShadow, $arrTags);
     $arrAdded = array_diff($arrTags, $arrShadow);
     // deletes!
     foreach ($arrDelete as $strTag) {
         foreach ($arrStored as $strStoredKey => $iStoredValue) {
             if (Xerxes_Framework_Parser::strtoupper($strTag) == Xerxes_Framework_Parser::strtoupper($strStoredKey)) {
                 $iStoredValue = (int) $iStoredValue;
                 if ($iStoredValue > 1) {
                     // just deincrement it
                     $iStoredValue--;
                     $arrStored[$strStoredKey] = $iStoredValue;
                 } else {
                     // this was the only entry for the tag so remove it
                     unset($arrStored[$strStoredKey]);
                 }
             }
         }
     }
     // adds!
     foreach ($arrAdded as $strTag) {
         if ($strTag != "") {
             $bolExists = false;
             foreach ($arrStored as $strStoredKey => $iStoredValue) {
                 if (Xerxes_Framework_Parser::strtoupper($strTag) == Xerxes_Framework_Parser::strtoupper($strStoredKey)) {
                     // there is one in here already so increment
                     $iStoredValue = (int) $iStoredValue;
                     $iStoredValue++;
                     $arrStored[$strStoredKey] = $iStoredValue;
                     $bolExists = true;
                 }
             }
             // if it wasn't in there already, add it as the first
             if ($bolExists == false) {
                 $arrStored[$strTag] = 1;
             }
         }
     }
     // now store it back in session
     $this->setTagsCache($arrStored);
     return 1;
 }
예제 #8
0
 public static function toSentenceCase($strInput)
 {
     if (strlen($strInput) > 1) {
         // drop everything
         $strInput = Xerxes_Framework_Parser::strtolower($strInput);
         // capitalize the first letter
         $strInput = Xerxes_Framework_Parser::strtoupper(substr($strInput, 0, 1)) . substr($strInput, 1);
         // and the start of a subtitle
         $strInput = self::capitalizeSubtitle($strInput);
     }
     return $strInput;
 }
예제 #9
0
 protected function toTitleCase($strInput)
 {
     // NOTE: if you make a change to this function, make a corresponding change
     // in the Xerxes_Framework_Parser class, since this one here is a duplicate function
     // allowing Xerxes_Record to be a stand-alone class
     $arrMatches = "";
     // matches from regular expression
     $arrSmallWords = "";
     // words that shouldn't be capitalized if they aren't the first word.
     $arrWords = "";
     // individual words in input
     $strFinal = "";
     // final string to return
     $strLetter = "";
     // first letter of subtitle, if any
     // if there are no lowercase letters (and its sufficiently long a title to
     // not just be an aconym or something) then this is likely a title stupdily
     // entered into a database in ALL CAPS, so drop it entirely to
     // lower-case first
     $iMatch = preg_match("/[a-z]/", $strInput);
     if ($iMatch == 0 && strlen($strInput) > 10) {
         $strInput = Xerxes_Framework_Parser::strtolower($strInput);
     }
     // array of small words
     $arrSmallWords = array('of', 'a', 'the', 'and', 'an', 'or', 'nor', 'but', 'is', 'if', 'then', 'else', 'when', 'at', 'from', 'by', 'on', 'off', 'for', 'in', 'out', 'over', 'to', 'into', 'with', 'as');
     // split the string into separate words
     $arrWords = explode(' ', $strInput);
     foreach ($arrWords as $key => $word) {
         // if this word is the first, or it's not one of our small words, capitalise it
         if ($key == 0 || !in_array(Xerxes_Framework_Parser::strtolower($word), $arrSmallWords)) {
             // make sure first character is not a quote or something
             if (preg_match("/^[^a-zA-Z0-9]/", $word)) {
                 $first = substr($word, 0, 1);
                 $rest = substr($word, 1);
                 $arrWords[$key] = $first . ucwords($rest);
             } else {
                 $arrWords[$key] = ucwords($word);
             }
         } elseif (in_array(Xerxes_Framework_Parser::strtolower($word), $arrSmallWords)) {
             $arrWords[$key] = Xerxes_Framework_Parser::strtolower($word);
         }
     }
     // join the words back into a string
     $strFinal = implode(' ', $arrWords);
     // catch subtitles
     if (preg_match("/: ([a-z])/", $strFinal, $arrMatches)) {
         $strLetter = ucwords($arrMatches[1]);
         $strFinal = preg_replace("/: ([a-z])/", ": " . $strLetter, $strFinal);
     }
     // catch words that start with double quotes
     if (preg_match("/\"([a-z])/", $strFinal, $arrMatches)) {
         $strLetter = ucwords($arrMatches[1]);
         $strFinal = preg_replace("/\"[a-z]/", "\"" . $strLetter, $strFinal);
     }
     // catch words that start with a single quote
     // need to be a little more cautious here and make sure there is a space before the quote when
     // inside the title to ensure this isn't a quote for a contraction or for possisive; separate
     // case to handle when the quote is the first word
     if (preg_match("/ '([a-z])/", $strFinal, $arrMatches)) {
         $strLetter = ucwords($arrMatches[1]);
         $strFinal = preg_replace("/ '[a-z]/", " '" . $strLetter, $strFinal);
     }
     if (preg_match("/^'([a-z])/", $strFinal, $arrMatches)) {
         $strLetter = ucwords($arrMatches[1]);
         $strFinal = preg_replace("/^'[a-z]/", "'" . $strLetter, $strFinal);
     }
     return $strFinal;
 }
예제 #10
0
 /**
  * Converts the query to AND all terms, while preserving boolean operators
  * and quoted phrases; return as array
  *
  * @param string $strQuery		original query
  * @return array				query normalized
  */
 public function normalizeArray($strQuery, $toLower = true)
 {
     $bolQuote = false;
     // flags the start and end of a quoted phrase
     $arrWords = array();
     // the query broken into a word array
     $arrFinal = array();
     // final array of words
     $strQuote = "";
     // quoted phrase
     $arrSmall = array();
     // normalize it
     if ($toLower == true) {
         $strQuery = Xerxes_Framework_Parser::strtolower($strQuery);
     }
     while (strstr($strQuery, "  ")) {
         $strQuery = str_replace("  ", " ", $strQuery);
     }
     // split words into an array
     $arrWords = explode(" ", $strQuery);
     // cycle thru each word in the query
     for ($x = 0; $x < count($arrWords); $x++) {
         if ($bolQuote == true) {
             // we are inside of a quoted phrase
             $strQuote .= " " . $arrWords[$x];
             if (strpos($arrWords[$x], "\"") !== false) {
                 // the end of a quoted phrase
                 $bolQuote = false;
                 if ($x + 1 < count($arrWords)) {
                     if ($arrWords[$x + 1] != "and" && $arrWords[$x + 1] != "or" && $arrWords[$x + 1] != "not") {
                         // the next word is not a boolean operator,
                         // so AND the current one
                         array_push($arrFinal, $strQuote);
                         array_push($arrFinal, "AND");
                     } else {
                         array_push($arrFinal, $strQuote);
                     }
                 } else {
                     array_push($arrFinal, $strQuote);
                 }
                 $strQuote = "";
             }
         } elseif ($bolQuote == false && strpos($arrWords[$x], "\"") !== false) {
             // this is the start of a quoted phrase
             $strQuote .= " " . $arrWords[$x];
             $bolQuote = true;
         } elseif ($arrWords[$x] == "and" || $arrWords[$x] == "or" || $arrWords[$x] == "not") {
             // the current word is a boolean operator
             array_push($arrFinal, Xerxes_Framework_Parser::strtoupper($arrWords[$x]));
         } else {
             $arrSmallWords = array('of', 'a', 'the', 'and', 'an', 'or', 'nor', 'but', 'is', 'if', 'then', 'else', 'when', 'at', 'from', 'by', 'on', 'off', 'for', 'in', 'out', 'over', 'to', 'into', 'with', 'as');
             if (in_array($arrWords[$x], $arrSmallWords)) {
                 array_push($arrSmall, $arrWords[$x]);
             }
             if ($x + 1 < count($arrWords)) {
                 if ($arrWords[$x + 1] != "and" && $arrWords[$x + 1] != "or" && $arrWords[$x + 1] != "not") {
                     // the next word is not a boolean operator,
                     // so AND the current one
                     array_push($arrFinal, $arrWords[$x]);
                     array_push($arrFinal, "AND");
                 } else {
                     array_push($arrFinal, $arrWords[$x]);
                 }
             } else {
                 array_push($arrFinal, $arrWords[$x]);
             }
         }
     }
     // single quoted phrase
     if (count($arrFinal) == 0 && $strQuote != "") {
         array_push($arrFinal, $strQuote);
     }
     // small terms
     if (count($arrSmall) > 0) {
         array_push($this->arrTips, array(self::SMALL_WORDS => "'" . implode("', '", $arrSmall) . "'"));
     }
     return $arrFinal;
 }