public function doExecute() { $configMemory = $this->registry->getConfig("HARVEST_MEMORY_LIMIT", false, "500M"); ini_set("memory_limit", $configMemory); echo "\n\nPEER REVIEWED DATA POPULATION \n\n"; $objData = new Xerxes_DataMap(); $file = $this->registry->getConfig("PATH_PARENT_DIRECTORY") . "/lib/data/refereed.txt"; ### dump // this is a dump, really only used by david to // generate the file to begin with if ($this->request->getProperty("dump") != null) { echo "Dumping peer reviewed data . . . "; $data = $objData->getAllRefereed(); $output = ""; foreach ($data as $title) { $output .= "\n" . $title->issn . "\t" . $title->title . "\t" . $title->timestamp; } file_put_contents($file, $output); echo "done.\n"; return 1; } ### load echo "Getting data from refereed file . . . "; $file_data = file_get_contents($file); if ($file_data === false) { throw new Exception("Could not find a refereed data file at {$file}"); } echo "done.\n"; $objData->beginTransaction(); if ($this->request->getProperty("no-flush") == null) { echo "Flushing old refereed data . . . "; $objData->flushRefereed(); echo "done.\n"; } echo "Adding new refereed data . . . "; $titles = explode("\n", $file_data); $x = 0; foreach ($titles as $title) { $arrTitle = explode("\t", $title); if (count($arrTitle) == 3) { $object = new Xerxes_Data_Refereed(); $object->issn = trim($arrTitle[0]); // $object->title = trim($arrTitle[1]); $object->timestamp = trim($arrTitle[2]); $x++; $objData->addRefereed($object); } } echo "done.\n"; echo "Added {$x} titles.\n"; echo "Committing changes . . . "; $objData->commit(); echo "done.\n"; return 1; }
public function doExecute() { $configMemory = $this->registry->getConfig("HARVEST_MEMORY_LIMIT", false, "500M"); ini_set("memory_limit", $configMemory); echo "\n\nSFX INSTITUTIONAL HOLDINGS POPULATION \n\n"; // You can define the export file on sfx as having an instance extension, so // give the client the opportunity to define that here $strInstance = $this->request->getProperty("instance"); if ($strInstance != "") { $strInstance = "-" . $strInstance; } // construct the address to Google Scholar institutional // holdings file on SFX. Either SFX specific config, or // general link resolver config. $configSfx = $this->registry->getConfig("ALTERNATE_FULLTEXT_HARVEST_ADDRESS", false, $this->registry->getConfig("LINK_RESOLVER_ADDRESS", false)); if (!$configSfx) { throw new Exception("Can not run populate action, no link resolver address configured. " . "Need config ALTERNATE_FULLTEXT_HARVEST_ADDRESS or LINK_RESOLVER_ADDRESS."); } // fire-up a transaction with the database $objData = new Xerxes_DataMap(); $objData->beginTransaction(); // clear old data echo " Flushing SFX fulltext table . . . "; $objData->clearFullText(); echo "done.\n"; // try to get the data from sfx $done = false; $x = 0; while ($done == false) { $x++; $strUrl = $configSfx . "/cgi/public/get_file.cgi?file=institutional_holding" . $strInstance . '-.prt' . str_pad($x, 2, '0', STR_PAD_LEFT) . ".xml"; echo " Pulling down SFX inst holding file ({$x}) . . . "; try { $strResponse = Xerxes_Framework_Parser::request($strUrl); $objXml = new SimpleXMLElement($strResponse); } catch (Exception $e) { if ($x == 1) { throw new Exception("cannot get institutional holding file from sfx: '{$strUrl}'. " . "If this is the correct SFX server address, make sure your SFX allows access to " . "institutional holding file from this IP address in config/get_file_restriction.config " . "on SFX server."); } $done = true; } echo "done.\n"; if (!$done) { echo " Processing file . . . "; $objItems = $objXml->xpath("//item[@type != 'other']"); if ($objItems == false) { throw new Exception("could not find items in inst holding file."); } echo "done.\n"; echo " Adding to database . . . "; foreach ($objItems as $objItem) { foreach ($objItem->coverage as $objCoverage) { $objFullText = new Xerxes_Data_Fulltext(); $objFullText->issn = (string) $objItem->issn; $objFullText->issn = str_replace("-", "", $objFullText->issn); $objFullText->title = (string) $objItem->title; $objFullText->title = urlencode($objFullText->title); $objFullText->title = substr(Xerxes_Framework_Parser::strtolower($objFullText->title), 0, 100); $objFullText->startdate = (int) $objCoverage->from->year; $objFullText->enddate = (int) $objCoverage->to->year; if ($objFullText->enddate == 0) { $objFullText->enddate = 9999; } $objFullText->embargo = (int) $objCoverage->embargo->days_not_available; $objFullText->updated = date("YmdHis"); // add it to the database $objData->addFulltext($objFullText); } } echo "done.\n"; } } echo " Commiting changes . . . "; $objData->commit(); echo "done.\n"; return 1; }
public function doExecute() { // in case this is being called from the web, plaintext if ($this->request->isCommandLine() == false) { header("Content-type: text/plain"); } // set a higher than normal memory limit to account for // pulling down large knowledgebases $configMemory = $this->registry->getConfig("HARVEST_MEMORY_LIMIT", false, "500M"); ini_set("memory_limit", $configMemory); echo "\n\nMETALIB KNOWLEDGEBASE PULL \n\n"; // get configuration settings $this->configInstitute = $this->registry->getConfig("METALIB_INSTITUTE", true); $this->configPortal = $this->registry->getConfig("METALIB_PORTAL", false, $this->configInstitute); $this->configLanguages = $this->registry->getConfig("LANGUAGES", false); $this->configChunk = $this->registry->getConfig("CHUNK_KB_PULL", false, false); $configMetalibAddress = $this->registry->getConfig("METALIB_ADDRESS", true); $configMetalibUsername = $this->registry->getConfig("METALIB_USERNAME", true); $configMetalibPassword = $this->registry->getConfig("METALIB_PASSWORD", true); // metalib search object $this->objSearch = new Xerxes_MetaSearch($configMetalibAddress, $configMetalibUsername, $configMetalibPassword); // data map $objData = new Xerxes_DataMap(); // clear the cache, while we're at it echo " Pruning cache table . . . "; $status = $objData->pruneCache(); if ($status != 1) { throw new Exception("could not prune cache"); } else { echo "done\n"; } // now the real kb stuff $objData->beginTransaction(); $arrSubjects = array(); // array of category and subcategory value objects $arrTypes = array(); // array of type value objects $arrDatabases = array(); // array of datatbase value objects echo " Flushing KB tables . . . "; $objData->clearKB(); echo "done\n"; echo " Fetching types . . . "; $arrTypes = $this->types(); foreach ($arrTypes as $objType) { $objData->addType($objType); } echo "done\n"; echo " Fetching databases . . . "; $arrDatabases = $this->databases(); foreach ($arrDatabases as $objDatabase) { $objData->addDatabase($objDatabase); } echo "done\n"; echo " Fetching categories and assigning databases . . . "; $languages = array(array("code" => "eng", "locale" => "C")); if ($this->configLanguages != null) { $languages = $this->configLanguages->language; } foreach ($languages as $language) { $locale = (string) $language["locale"]; $code = (string) $language["code"]; $oldlocale = setlocale(LC_CTYPE, 0); setlocale(LC_CTYPE, $locale); // this influences the iconv() call with 'ASCII//TRANSLIT' target $arrSubjects = $this->subjects($arrDatabases, $code); foreach ($arrSubjects as $objCategory) { $objData->addCategory($objCategory); } setlocale(LC_CTYPE, $oldlocale); } echo "done\n"; echo " Synching user saved databases . . . "; $objData->synchUserDatabases(); echo "done\n"; echo " Committing changes . . . "; $objData->commit(); echo "done\n"; return 1; }