/** * Converts a sting to a normalized (no-spaces, non-letters) string * * @param string $strSubject original string * @return string normalized string */ public static function normalize($strSubject) { $strNormalized = iconv('UTF-8', 'ASCII//TRANSLIT', $strSubject); // this is influenced by the setlocale() call with category LC_CTYPE; see PopulateDatabases.php $strNormalized = Xerxes_Framework_Parser::strtolower($strNormalized); $strNormalized = str_replace("&", "", $strNormalized); $strNormalized = str_replace("'", "", $strNormalized); $strNormalized = str_replace("+", "-", $strNormalized); $strNormalized = str_replace(" ", "-", $strNormalized); $strNormalized = Xerxes_Framework_Parser::preg_replace('/\\W/', "-", $strNormalized); while (strstr($strNormalized, "--")) { $strNormalized = str_replace("--", "-", $strNormalized); } return $strNormalized; }
public function doExecute() { $configMemory = $this->registry->getConfig("HARVEST_MEMORY_LIMIT", false, "500M"); ini_set("memory_limit", $configMemory); echo "\n\nSFX INSTITUTIONAL HOLDINGS POPULATION \n\n"; // You can define the export file on sfx as having an instance extension, so // give the client the opportunity to define that here $strInstance = $this->request->getProperty("instance"); if ($strInstance != "") { $strInstance = "-" . $strInstance; } // construct the address to Google Scholar institutional // holdings file on SFX. Either SFX specific config, or // general link resolver config. $configSfx = $this->registry->getConfig("ALTERNATE_FULLTEXT_HARVEST_ADDRESS", false, $this->registry->getConfig("LINK_RESOLVER_ADDRESS", false)); if (!$configSfx) { throw new Exception("Can not run populate action, no link resolver address configured. " . "Need config ALTERNATE_FULLTEXT_HARVEST_ADDRESS or LINK_RESOLVER_ADDRESS."); } // fire-up a transaction with the database $objData = new Xerxes_DataMap(); $objData->beginTransaction(); // clear old data echo " Flushing SFX fulltext table . . . "; $objData->clearFullText(); echo "done.\n"; // try to get the data from sfx $done = false; $x = 0; while ($done == false) { $x++; $strUrl = $configSfx . "/cgi/public/get_file.cgi?file=institutional_holding" . $strInstance . '-.prt' . str_pad($x, 2, '0', STR_PAD_LEFT) . ".xml"; echo " Pulling down SFX inst holding file ({$x}) . . . "; try { $strResponse = Xerxes_Framework_Parser::request($strUrl); $objXml = new SimpleXMLElement($strResponse); } catch (Exception $e) { if ($x == 1) { throw new Exception("cannot get institutional holding file from sfx: '{$strUrl}'. " . "If this is the correct SFX server address, make sure your SFX allows access to " . "institutional holding file from this IP address in config/get_file_restriction.config " . "on SFX server."); } $done = true; } echo "done.\n"; if (!$done) { echo " Processing file . . . "; $objItems = $objXml->xpath("//item[@type != 'other']"); if ($objItems == false) { throw new Exception("could not find items in inst holding file."); } echo "done.\n"; echo " Adding to database . . . "; foreach ($objItems as $objItem) { foreach ($objItem->coverage as $objCoverage) { $objFullText = new Xerxes_Data_Fulltext(); $objFullText->issn = (string) $objItem->issn; $objFullText->issn = str_replace("-", "", $objFullText->issn); $objFullText->title = (string) $objItem->title; $objFullText->title = urlencode($objFullText->title); $objFullText->title = substr(Xerxes_Framework_Parser::strtolower($objFullText->title), 0, 100); $objFullText->startdate = (int) $objCoverage->from->year; $objFullText->enddate = (int) $objCoverage->to->year; if ($objFullText->enddate == 0) { $objFullText->enddate = 9999; } $objFullText->embargo = (int) $objCoverage->embargo->days_not_available; $objFullText->updated = date("YmdHis"); // add it to the database $objData->addFulltext($objFullText); } } echo "done.\n"; } } echo " Commiting changes . . . "; $objData->commit(); echo "done.\n"; return 1; }
/** * Set a value for a configuration, from code rather than the file * * @param string $key configuration setting name * @param mixed $value value. Generally String or SimpleXMLElement. * @param bool $bolPass [optional] whether value should be passed to XML (default false) */ public function setConfig($key, $value, $bolPass = false) { $this->arrConfig[Xerxes_Framework_Parser::strtoupper($key)] = $value; if ($bolPass == true) { $this->arrPass[Xerxes_Framework_Parser::strtolower($key)] = $value; } }
private function formatting($data, $node) { $style = ""; // stylistic rendering // stylistic elements foreach ($node->attributes as $attribute) { if ($attribute->getName() == "font-family" || $attribute->getName() == "font-style" || $attribute->getName() == "font-variant" || $attribute->getName() == "font-weight" || $attribute->getName() == "text-decoration" || $attribute->getName() == "vertical-align" || $attribute->getName() == "display") { $style .= " " . $attribute->getName() . ": " . (string) $attribute; } } // capitalization if ($node["text-case"]) { switch ((string) $node["text-case"]) { case "lowercase": $data = Xerxes_Framework_Parser::strtolower($data); break; case "uppercase": $data = Xerxes_Framework_Parser::strtoupper($data); break; case "capitalize-first": case "sentence": $data = Xerxes_Framework_Parser::strtoupper(substr($data, 0, 1)) . substr($data, 1); break; case "capitalize-all": //TODO: add this to parser? break; case "title": //TODO: make reference to parser? break; } } // stylistic rendering if ($style != "") { $data = "<span style=\"{$style}\">{$data}</span>"; } // add quotes if ($node["quotes"]) { $data = "\"" . $data . "\""; } return $node["prefix"] . $data . $node["suffix"]; }
/** * Get localized language name of provided ISO 639 code * * @param string $type the standard according to which the code will be interpreted; * one of: iso_639_1_code, iso_639_2B_code * @param string $code the 2-letter language code * @param string $override_locale use this locale instead of Xerxes locale * @return mixed A string with the localized language name or NULL if the code is not valid */ public function getNameFromCode($type, $code, $override_locale = null) { if ($type != 'name') { $code = Xerxes_Framework_Parser::strtolower($code); } $elements = $this->xpath->query("//iso_639_entry[@{$type}='{$code}']"); if (!is_null($elements)) { foreach ($elements as $element) { $name = $element->getAttribute('name'); if ($this->gettext == false) { return $name; } $originalLocale = $this->getXerxesLocale(); if ($override_locale == null) { $this->setXerxesLocale($this->locale); } else { $this->setXerxesLocale($override_locale); } $languageName = dgettext($this->domain, $name); $this->setXerxesLocale($originalLocale); return $languageName; } } else { return null; } }
/** * Add global array as xml to request xml document * * @param DOMDocument $objXml [by reference] request xml document * @param DOMNode $objAppend [by reference] node to append values to * @param array $arrValues global array */ private function addElement(&$objXml, &$objAppend, $arrValues) { foreach ($arrValues as $key => $value) { // need to make sure the xml element has a valid name // and not something crazy with spaces or commas, etc. $strSafeKey = Xerxes_Framework_Parser::strtolower(preg_replace('/\\W/', '_', $key)); if (is_array($value)) { foreach ($value as $strKey => $strValue) { $objElement = $objXml->createElement($strSafeKey); $objElement->setAttribute("key", $strKey); $objAppend->appendChild($objElement); if (is_array($strValue)) { // multi-dimensional arrays will be recursively added $this->addElement($objXml, $objElement, $strValue); } else { $objElement->nodeValue = Xerxes_Framework_Parser::escapeXml($strValue); } } } else { $objElement = $objXml->createElement($strSafeKey, Xerxes_Framework_Parser::escapeXml($value)); $objAppend->appendChild($objElement); } } }
public function doExecute() { $strUsername = $this->request->getSession("username"); $iRecord = $this->request->getProperty("record"); $strTags = $this->request->getProperty("tags"); // updated tags $strShadowTags = $this->request->getProperty("tagsShaddow"); // original tags // split tags out on comma $arrShadow = explode(",", $strShadowTags); $arrTags = explode(",", $strTags); for ($x = 0; $x < count($arrTags); $x++) { $arrTags[$x] = Xerxes_Framework_Parser::strtolower(trim($arrTags[$x])); } for ($x = 0; $x < count($arrShadow); $x++) { $arrShadow[$x] = Xerxes_Framework_Parser::strtolower(trim($arrShadow[$x])); } // remove any duplicates $arrTags = array_unique($arrTags); // update the database $objData = new Xerxes_DataMap(); $objData->assignTags($strUsername, $arrTags, $iRecord); // now update the cached version without recalculating all the // totals with a round-trip to the database $arrStored = $this->request->getSession("tags"); // see which tags are new and which are actually being deleted or changed $arrDelete = array_diff($arrShadow, $arrTags); $arrAdded = array_diff($arrTags, $arrShadow); // deletes! foreach ($arrDelete as $strTag) { foreach ($arrStored as $strStoredKey => $iStoredValue) { if (Xerxes_Framework_Parser::strtoupper($strTag) == Xerxes_Framework_Parser::strtoupper($strStoredKey)) { $iStoredValue = (int) $iStoredValue; if ($iStoredValue > 1) { // just deincrement it $iStoredValue--; $arrStored[$strStoredKey] = $iStoredValue; } else { // this was the only entry for the tag so remove it unset($arrStored[$strStoredKey]); } } } } // adds! foreach ($arrAdded as $strTag) { if ($strTag != "") { $bolExists = false; foreach ($arrStored as $strStoredKey => $iStoredValue) { if (Xerxes_Framework_Parser::strtoupper($strTag) == Xerxes_Framework_Parser::strtoupper($strStoredKey)) { // there is one in here already so increment $iStoredValue = (int) $iStoredValue; $iStoredValue++; $arrStored[$strStoredKey] = $iStoredValue; $bolExists = true; } } // if it wasn't in there already, add it as the first if ($bolExists == false) { $arrStored[$strTag] = 1; } } } // now store it back in session $this->setTagsCache($arrStored); return 1; }
public static function toSentenceCase($strInput) { if (strlen($strInput) > 1) { // drop everything $strInput = Xerxes_Framework_Parser::strtolower($strInput); // capitalize the first letter $strInput = Xerxes_Framework_Parser::strtoupper(substr($strInput, 0, 1)) . substr($strInput, 1); // and the start of a subtitle $strInput = self::capitalizeSubtitle($strInput); } return $strInput; }
protected function toTitleCase($strInput) { // NOTE: if you make a change to this function, make a corresponding change // in the Xerxes_Framework_Parser class, since this one here is a duplicate function // allowing Xerxes_Record to be a stand-alone class $arrMatches = ""; // matches from regular expression $arrSmallWords = ""; // words that shouldn't be capitalized if they aren't the first word. $arrWords = ""; // individual words in input $strFinal = ""; // final string to return $strLetter = ""; // first letter of subtitle, if any // if there are no lowercase letters (and its sufficiently long a title to // not just be an aconym or something) then this is likely a title stupdily // entered into a database in ALL CAPS, so drop it entirely to // lower-case first $iMatch = preg_match("/[a-z]/", $strInput); if ($iMatch == 0 && strlen($strInput) > 10) { $strInput = Xerxes_Framework_Parser::strtolower($strInput); } // array of small words $arrSmallWords = array('of', 'a', 'the', 'and', 'an', 'or', 'nor', 'but', 'is', 'if', 'then', 'else', 'when', 'at', 'from', 'by', 'on', 'off', 'for', 'in', 'out', 'over', 'to', 'into', 'with', 'as'); // split the string into separate words $arrWords = explode(' ', $strInput); foreach ($arrWords as $key => $word) { // if this word is the first, or it's not one of our small words, capitalise it if ($key == 0 || !in_array(Xerxes_Framework_Parser::strtolower($word), $arrSmallWords)) { // make sure first character is not a quote or something if (preg_match("/^[^a-zA-Z0-9]/", $word)) { $first = substr($word, 0, 1); $rest = substr($word, 1); $arrWords[$key] = $first . ucwords($rest); } else { $arrWords[$key] = ucwords($word); } } elseif (in_array(Xerxes_Framework_Parser::strtolower($word), $arrSmallWords)) { $arrWords[$key] = Xerxes_Framework_Parser::strtolower($word); } } // join the words back into a string $strFinal = implode(' ', $arrWords); // catch subtitles if (preg_match("/: ([a-z])/", $strFinal, $arrMatches)) { $strLetter = ucwords($arrMatches[1]); $strFinal = preg_replace("/: ([a-z])/", ": " . $strLetter, $strFinal); } // catch words that start with double quotes if (preg_match("/\"([a-z])/", $strFinal, $arrMatches)) { $strLetter = ucwords($arrMatches[1]); $strFinal = preg_replace("/\"[a-z]/", "\"" . $strLetter, $strFinal); } // catch words that start with a single quote // need to be a little more cautious here and make sure there is a space before the quote when // inside the title to ensure this isn't a quote for a contraction or for possisive; separate // case to handle when the quote is the first word if (preg_match("/ '([a-z])/", $strFinal, $arrMatches)) { $strLetter = ucwords($arrMatches[1]); $strFinal = preg_replace("/ '[a-z]/", " '" . $strLetter, $strFinal); } if (preg_match("/^'([a-z])/", $strFinal, $arrMatches)) { $strLetter = ucwords($arrMatches[1]); $strFinal = preg_replace("/^'[a-z]/", "'" . $strLetter, $strFinal); } return $strFinal; }
/** * Converts the query to AND all terms, while preserving boolean operators * and quoted phrases; return as array * * @param string $strQuery original query * @return array query normalized */ public function normalizeArray($strQuery, $toLower = true) { $bolQuote = false; // flags the start and end of a quoted phrase $arrWords = array(); // the query broken into a word array $arrFinal = array(); // final array of words $strQuote = ""; // quoted phrase $arrSmall = array(); // normalize it if ($toLower == true) { $strQuery = Xerxes_Framework_Parser::strtolower($strQuery); } while (strstr($strQuery, " ")) { $strQuery = str_replace(" ", " ", $strQuery); } // split words into an array $arrWords = explode(" ", $strQuery); // cycle thru each word in the query for ($x = 0; $x < count($arrWords); $x++) { if ($bolQuote == true) { // we are inside of a quoted phrase $strQuote .= " " . $arrWords[$x]; if (strpos($arrWords[$x], "\"") !== false) { // the end of a quoted phrase $bolQuote = false; if ($x + 1 < count($arrWords)) { if ($arrWords[$x + 1] != "and" && $arrWords[$x + 1] != "or" && $arrWords[$x + 1] != "not") { // the next word is not a boolean operator, // so AND the current one array_push($arrFinal, $strQuote); array_push($arrFinal, "AND"); } else { array_push($arrFinal, $strQuote); } } else { array_push($arrFinal, $strQuote); } $strQuote = ""; } } elseif ($bolQuote == false && strpos($arrWords[$x], "\"") !== false) { // this is the start of a quoted phrase $strQuote .= " " . $arrWords[$x]; $bolQuote = true; } elseif ($arrWords[$x] == "and" || $arrWords[$x] == "or" || $arrWords[$x] == "not") { // the current word is a boolean operator array_push($arrFinal, Xerxes_Framework_Parser::strtoupper($arrWords[$x])); } else { $arrSmallWords = array('of', 'a', 'the', 'and', 'an', 'or', 'nor', 'but', 'is', 'if', 'then', 'else', 'when', 'at', 'from', 'by', 'on', 'off', 'for', 'in', 'out', 'over', 'to', 'into', 'with', 'as'); if (in_array($arrWords[$x], $arrSmallWords)) { array_push($arrSmall, $arrWords[$x]); } if ($x + 1 < count($arrWords)) { if ($arrWords[$x + 1] != "and" && $arrWords[$x + 1] != "or" && $arrWords[$x + 1] != "not") { // the next word is not a boolean operator, // so AND the current one array_push($arrFinal, $arrWords[$x]); array_push($arrFinal, "AND"); } else { array_push($arrFinal, $arrWords[$x]); } } else { array_push($arrFinal, $arrWords[$x]); } } } // single quoted phrase if (count($arrFinal) == 0 && $strQuote != "") { array_push($arrFinal, $strQuote); } // small terms if (count($arrSmall) > 0) { array_push($this->arrTips, array(self::SMALL_WORDS => "'" . implode("', '", $arrSmall) . "'")); } return $arrFinal; }