protected function execute($arguments = array(), $options = array()) { $timer = new QubitTimer(); // overall timing $context = sfContext::createInstance($this->configuration); if (empty($arguments['folder']) || !file_exists($arguments['folder'])) { throw new sfException('You must specify a valid import folder or file'); } // Set indexing preference if ($options['noindex']) { QubitSearch::getInstance()->disabled = true; } else { QubitSearch::getInstance()->getEngine()->enableBatchMode(); } if (is_dir($arguments['folder'])) { // Recurse into the import folder $files = $this->dir_tree(rtrim($arguments['folder'], '/')); } else { $files = array($arguments['folder']); } // TODO: Add some colour $this->log("Importing " . count($files) . " files from " . $arguments['folder'] . " (indexing is " . ($options['noindex'] ? "DISABLED" : "ENABLED") . ") ...\n"); $count = 0; $total = count($files); foreach ($files as $file) { $start = microtime(true); // Choose import type based on file extension, eg. csv, xml if ('csv' == pathinfo($file, PATHINFO_EXTENSION)) { $importer = new QubitCsvImport(); $importer->import($file, $options); } elseif ('xml' == pathinfo($file, PATHINFO_EXTENSION)) { $importer = new QubitXmlImport(); $importer->import($file, array('strictXmlParsing' => false)); } else { // Move on to the next file continue; } // Try to free up memory unset($importer); $count++; $split = microtime(true) - $start; // Store details if output is specified if ($options['output']) { $rows[] = array($count, $split, memory_get_usage()); } if ($options['v']) { $this->log(basename($file) . " imported (" . round($split, 2) . " s) (" . $count . "/" . $total . ")"); } } // Create/open output file if specified if ($options['output']) { $fh = fopen($options['output'], 'w+'); foreach ($rows as $row) { fputcsv($fh, $row); } fputcsv($fh, array('', $timer->elapsed(), memory_get_peak_usage())); fclose($fh); } // Optimize index if enabled if (!$options['noindex']) { QubitSearch::getInstance()->getEngine()->optimize(); } $this->log("\nSuccessfully imported " . $count . " XML/CSV files in " . $timer->elapsed() . " s. " . memory_get_peak_usage() . " bytes used."); }
/** * Executes action * * @param sfRequest $request A request object */ public function execute($request) { //Set resumption token to null so that we enter in the harvester loop $resumptionToken = 1; //Set no records match to false to start with $this->noRecordsMatch = false; //Keep track of number of records harvested $this->recordCount = 0; //If the request did not go through the proper routing, forward to 404 if (!isset($request->id)) { $this->forward404(); } $harvestInfo = QubitOaiHarvest::getById($request->id); $harvestInfo->setLastHarvestAttempt(QubitOai::getDate()); $harvestInfo->save(); $rep = $harvestInfo->getOaiRepository(); //If repository was not found 404 if (!$rep) { $this->forward404(); } $this->repositoryName = $rep->getName(); //Initialise $oaiSimpleRes $verb = ''; $oaiSimpleRes = array(); $from = date('Y-m-d\\TH:i:s\\Z', strtotime($harvestInfo->getLastHarvest())); $until = gmdate('Y-m-d\\TH:i:s\\Z'); //Create the base request $verb = 'verb=ListRecords'; if ($harvestInfo->getLastHarvest() != null) { $verb .= '&from=' . $from; } $verb .= '&until=' . $until; $verb .= '&metadataPrefix=' . $harvestInfo->getMetadataPrefix(); //Add the set parameter if supplied if ($harvestInfo->getSet() != null) { $verb .= '&set=' . $harvestInfo->getSet(); } while ($resumptionToken) { //Load XML through simplexml http $oaiSimple = simplexml_load_file($rep->getUri() . '?' . $verb); //Strip oai header, construct array of records $oaiSimple->registerXPathNamespace('c', 'http://www.openarchives.org/OAI/2.0/'); if ($oaiSimple->xpath('//c:error')) { $oaiReceivedError = $oaiSimple->xpath('//c:error'); $oaiReceivedErrorAttr = $oaiReceivedError[0]->attributes(); if ($oaiReceivedErrorAttr['code'] == 'noRecordsMatch') { $this->noRecordsMatch = true; } else { $this->forward404(); } } if (!$this->noRecordsMatch) { //Container for xml import errors $this->errorsFound = array(); $this->errorsXML = array(); //Create header and footer for XML record for it to validate $oaiHeader = '<?xml version="1.0" encoding="UTF-8" ?>'; $oaiFooter = ''; $oaiRecords = $oaiSimple->xpath('//c:ListRecords/c:record'); foreach ($oaiRecords as $oaiRec) { $oaiRec = $oaiHeader . $oaiRec->asXML() . $oaiFooter . "\n"; $options = array(); $options = $options['strictXmlParsing'] = false; $importer = new QubitXmlImport(); $importer->import($oaiRec, $options); // $importer = QubitXmlImport::execute($oaiRec, $options); if ($importer->hasErrors()) { $this->errorsFound[] = $importer->getErrors(); $this->errorsXML[] = $oaiRec; } } // Increment record count to keep track of number of records harvested $this->recordCount += count($oaiRecords); $nbrErrors = count($this->errorsFound); $errorReport = ''; $errorReportHTML = ''; for ($i = 0; $i < $nbrErrors; $i++) { $errorReport .= "Error when importing record:\n\n" . $this->errorsXML[$i] . "\n\nError message:\n" . $this->errorsFound[$i]; $errorReport .= "\n**************************************************\n\n"; $errorReportHTML .= "Error when importing record:\n\n <br>" . $this->errorsXML[$i] . "\n\n <br> Error message: <br> \n" . $this->errorsFound[$i]; } } //Check for resumption token which will also be the while loop indicator $oaiResumptionToken = $oaiSimple->xpath('//c:ListRecords/c:resumptionToken'); if ($oaiResumptionToken == false || count($oaiResumptionToken) > 1) { $resumptionToken = false; } else { $resumptionToken = $oaiResumptionToken[0]; $verb = 'verb=ListRecords&resumptionToken=' . $resumptionToken; } } // Update last harvest date $harvestInfo->setLastHarvest($until); $harvestInfo->save(); }
public function execute($request) { $this->timer = new QubitTimer(); $file = $request->getFiles('file'); // if we got here without a file upload, go to file selection if (!isset($file)) { $this->redirect(array('module' => 'object', 'action' => 'importSelect')); } // set indexing preference if ($request->getParameter('noindex')) { QubitSearch::getInstance()->disabled = true; } else { QubitSearch::getInstance()->getEngine()->enableBatchMode(); } try { // choose import type based on file extension, eg. csv, xml if ('csv' == pathinfo($file['name'], PATHINFO_EXTENSION)) { $importer = new QubitCsvImport(); $importer->import($file['tmp_name'], array('schema' => $request->getParameter('schema'))); } elseif ('xml' == pathinfo($file['name'], PATHINFO_EXTENSION)) { $importer = new QubitXmlImport(); $importer->import($file['tmp_name'], array('strictXmlParsing' => false)); } elseif ('zip' == pathinfo($file['name'], PATHINFO_EXTENSION) && class_exists('ZipArchive')) { $zipFolder = $file['tmp_name'] . '-zip'; if (!file_exists($zipFolder)) { mkdir($zipFolder, 0755); } // extract the zip archive into the temporary folder // TODO: need some error handling here $zip = new ZipArchive(); $zip->open($file['tmp_name']); $zip->extractTo($zipFolder); $zip->close(); $files = $this->dir_tree($zipFolder); // this code is from lib/importBulkTask.class.php foreach ($files as $import_file) { // try to free up memory unset($importer); // choose import type based on file extension, eg. csv, xml if ('csv' == pathinfo($import_file, PATHINFO_EXTENSION)) { $importer = new QubitCsvImport(); $importer->import($import_file, array('schema')); } elseif ('xml' == pathinfo($import_file, PATHINFO_EXTENSION)) { $importer = new QubitXmlImport(); $importer->import($import_file, array('strictXmlParsing' => false)); } else { // move on to the next file continue; } } } else { $this->context->user->setFlash('error', $this->context->i18n->__('Unable to import selected file')); $this->redirect(array('module' => 'object', 'action' => 'importSelect')); } } catch (sfException $e) { $this->context->user->setFlash('error', $e->getMessage()); $this->redirect(array('module' => 'object', 'action' => 'importSelect')); } // optimize index if enabled if (!$request->getParameter('noindex')) { QubitSearch::getInstance()->getEngine()->optimize(); } $this->errors = $importer->getErrors(); $this->rootObject = $importer->getRootObject(); $this->objectType = strtr(get_class($this->rootObject), array('Qubit' => '')); }