Пример #1
0
function remove_dataObject()
{
    require_library('ResourceDataObjectElementsSetting');
    $resource_id = 346;
    $resource_path = "http://localhost/eol_php_code/applications/content_server/resources/346.xml.gz";
    $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path);
    $xml = $nmnh->load_xml_string();
    $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml);
    $nmnh->save_resource_document($xml);
}
Пример #2
0
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $new_resource_path);
    return;
}
fwrite($OUT, $new_resource);
fclose($OUT);
shell_exec("gunzip -f " . $new_resource_path);
$new_resource_path = DOC_ROOT . "temp/22.xml";
$xml = file_get_contents($new_resource_path);
// $xml = str_replace("<dc:description>", "<dc:description><![CDATA[", $xml);
// $xml = str_replace("</dc:description>", "]]></dc:description>", $xml);
$xml = preg_replace("/<a>([^<]+)<\\/a>/", "\\1", $xml);
if (substr_count($xml, "<?xml") == 0) {
    $xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" . $xml;
}
$old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . "22.xml";
if (!($OUT = fopen($old_resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path);
    return;
}
fwrite($OUT, $xml);
fclose($OUT);
shell_exec("rm " . $new_resource_path);
$resource_id = 22;
require_library('ResourceDataObjectElementsSetting');
$resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
$func = new ResourceDataObjectElementsSetting($resource_id, $resource_path);
$xml_string = file_get_contents($resource_path);
$xml = $func->remove_data_object_of_certain_element_value("mediaURL", "http://animaldiversity.ummz.umich.edu/", $xml_string);
$func->save_resource_document($xml);
Functions::set_resource_status_to_force_harvest($resource_id);
Functions::gzip_resource_xml($resource_id);
Пример #3
0
<?php

namespace php_active_record;

/*
accesspoint_url = "http://dumps.wikimedia.org/commonswiki/latest/commonswiki-latest-pages-articles.xml.bz2";
*/
define('DOWNLOAD_WAIT_TIME', '1000000');
// 2 second wait after every web request
include_once dirname(__FILE__) . "/../../config/environment.php";
// $GLOBALS['ENV_DEBUG'] = false;
define("WIKI_USER_PREFIX", "http://commons.wikimedia.org/wiki/User:"******"WIKI_PREFIX", "http://commons.wikimedia.org/wiki/");
require_vendor("wikipedia");
$resource_id = 71;
if (!Functions::can_this_connector_run($resource_id)) {
    return;
}
$w = new WikimediaHarvester(Resource::find($resource_id));
$w->begin_wikimedia_harvest("update_resources/connectors/files/");
sleep(120);
// delay 2 mins.
require_library("ResourceDataObjectElementsSetting");
$resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
$func = new ResourceDataObjectElementsSetting($resource_id, $resource_path);
$xml_string = file_get_contents($resource_path);
$xml = $func->remove_data_object_of_certain_element_value("dataType", "", $xml_string);
$func->save_resource_document($xml);
Functions::set_resource_status_to_force_harvest($resource_id);
Пример #4
0
- If needed ingests TypeInformation text dataObjects
- replaces wrong mimeType value
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('ResourceDataObjectElementsSetting');
$timestart = time_elapsed();
$resource_id = 341;
$resource_path = "http://collections.mnh.si.edu/services/eol/nmnh-birds-response.xml.gz";
//Birds resource
$result = $GLOBALS['db_connection']->select("SELECT accesspoint_url FROM resources WHERE id={$resource_id}");
$row = $result->fetch_row();
$new_resource_path = $row[0];
if ($resource_path != $new_resource_path && $new_resource_path != '') {
    $resource_path = $new_resource_path;
}
echo "\n processing resource:\n {$resource_path} \n\n";
$nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path, 'http://purl.org/dc/dcmitype/StillImage', 2);
$xml = $nmnh->set_data_object_rating_on_xml_document();
require_library('connectors/INBioAPI');
$xml = INBioAPI::assign_eol_subjects($xml);
$xml = $nmnh->replace_data_object_element_value("mimeType", "audio/wav", "audio/x-wav", $xml);
$xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "audio/x-wav", $xml);
// to exclude <dataObject>'s of this element and value
$nmnh->save_resource_document($xml);
Functions::set_resource_status_to_force_harvest($resource_id);
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} seconds             \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes  \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n";
echo "\n\n Done processing.";
Пример #5
0
$new_resource_path = $row[0];
if ($resource_path != $new_resource_path && $new_resource_path != '') {
    $resource_path = $new_resource_path;
}
echo "\n processing resource:\n {$resource_path} \n\n";
$nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path, 'http://purl.org/dc/dcmitype/StillImage', 2);
$xml = $nmnh->set_data_object_rating_on_xml_document();
//manual fix DATA-1189, until partner fixes their data
$xml = str_ireplace("Photograph of Photograph of", "Photograph of", $xml);
//manual fix DATA-1205
$xml = replace_Indet_sp($xml);
$xml = remove_blank_taxon_entry($xml);
require_library('connectors/INBioAPI');
$xml = INBioAPI::assign_eol_subjects($xml);
//fix DATA-1420
$xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml);
$nmnh->save_resource_document($xml);
Functions::set_resource_status_to_force_harvest($resource_id);
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} seconds             \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes  \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n";
echo "\n\n Done processing.";
function remove_blank_taxon_entry($xml)
{
    $xml = preg_replace('/\\s*(<[^>]*>)\\s*/', '$1', $xml);
    // remove whitespaces
    $xml = str_ireplace(array("<taxon></taxon>", "<taxon/>"), "", $xml);
    return $xml;
}