function remove_dataObject() { require_library('ResourceDataObjectElementsSetting'); $resource_id = 346; $resource_path = "http://localhost/eol_php_code/applications/content_server/resources/346.xml.gz"; $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path); $xml = $nmnh->load_xml_string(); $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml); $nmnh->save_resource_document($xml); }
debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $new_resource_path); return; } fwrite($OUT, $new_resource); fclose($OUT); shell_exec("gunzip -f " . $new_resource_path); $new_resource_path = DOC_ROOT . "temp/22.xml"; $xml = file_get_contents($new_resource_path); // $xml = str_replace("<dc:description>", "<dc:description><![CDATA[", $xml); // $xml = str_replace("</dc:description>", "]]></dc:description>", $xml); $xml = preg_replace("/<a>([^<]+)<\\/a>/", "\\1", $xml); if (substr_count($xml, "<?xml") == 0) { $xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" . $xml; } $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . "22.xml"; if (!($OUT = fopen($old_resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path); return; } fwrite($OUT, $xml); fclose($OUT); shell_exec("rm " . $new_resource_path); $resource_id = 22; require_library('ResourceDataObjectElementsSetting'); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $func = new ResourceDataObjectElementsSetting($resource_id, $resource_path); $xml_string = file_get_contents($resource_path); $xml = $func->remove_data_object_of_certain_element_value("mediaURL", "http://animaldiversity.ummz.umich.edu/", $xml_string); $func->save_resource_document($xml); Functions::set_resource_status_to_force_harvest($resource_id); Functions::gzip_resource_xml($resource_id);
<?php namespace php_active_record; /* accesspoint_url = "http://dumps.wikimedia.org/commonswiki/latest/commonswiki-latest-pages-articles.xml.bz2"; */ define('DOWNLOAD_WAIT_TIME', '1000000'); // 2 second wait after every web request include_once dirname(__FILE__) . "/../../config/environment.php"; // $GLOBALS['ENV_DEBUG'] = false; define("WIKI_USER_PREFIX", "http://commons.wikimedia.org/wiki/User:"******"WIKI_PREFIX", "http://commons.wikimedia.org/wiki/"); require_vendor("wikipedia"); $resource_id = 71; if (!Functions::can_this_connector_run($resource_id)) { return; } $w = new WikimediaHarvester(Resource::find($resource_id)); $w->begin_wikimedia_harvest("update_resources/connectors/files/"); sleep(120); // delay 2 mins. require_library("ResourceDataObjectElementsSetting"); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $func = new ResourceDataObjectElementsSetting($resource_id, $resource_path); $xml_string = file_get_contents($resource_path); $xml = $func->remove_data_object_of_certain_element_value("dataType", "", $xml_string); $func->save_resource_document($xml); Functions::set_resource_status_to_force_harvest($resource_id);
- If needed ingests TypeInformation text dataObjects - replaces wrong mimeType value */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('ResourceDataObjectElementsSetting'); $timestart = time_elapsed(); $resource_id = 341; $resource_path = "http://collections.mnh.si.edu/services/eol/nmnh-birds-response.xml.gz"; //Birds resource $result = $GLOBALS['db_connection']->select("SELECT accesspoint_url FROM resources WHERE id={$resource_id}"); $row = $result->fetch_row(); $new_resource_path = $row[0]; if ($resource_path != $new_resource_path && $new_resource_path != '') { $resource_path = $new_resource_path; } echo "\n processing resource:\n {$resource_path} \n\n"; $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path, 'http://purl.org/dc/dcmitype/StillImage', 2); $xml = $nmnh->set_data_object_rating_on_xml_document(); require_library('connectors/INBioAPI'); $xml = INBioAPI::assign_eol_subjects($xml); $xml = $nmnh->replace_data_object_element_value("mimeType", "audio/wav", "audio/x-wav", $xml); $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "audio/x-wav", $xml); // to exclude <dataObject>'s of this element and value $nmnh->save_resource_document($xml); Functions::set_resource_status_to_force_harvest($resource_id); $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} seconds \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; echo "\n\n Done processing.";
$new_resource_path = $row[0]; if ($resource_path != $new_resource_path && $new_resource_path != '') { $resource_path = $new_resource_path; } echo "\n processing resource:\n {$resource_path} \n\n"; $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path, 'http://purl.org/dc/dcmitype/StillImage', 2); $xml = $nmnh->set_data_object_rating_on_xml_document(); //manual fix DATA-1189, until partner fixes their data $xml = str_ireplace("Photograph of Photograph of", "Photograph of", $xml); //manual fix DATA-1205 $xml = replace_Indet_sp($xml); $xml = remove_blank_taxon_entry($xml); require_library('connectors/INBioAPI'); $xml = INBioAPI::assign_eol_subjects($xml); //fix DATA-1420 $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml); $nmnh->save_resource_document($xml); Functions::set_resource_status_to_force_harvest($resource_id); $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} seconds \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; echo "\n\n Done processing."; function remove_blank_taxon_entry($xml) { $xml = preg_replace('/\\s*(<[^>]*>)\\s*/', '$1', $xml); // remove whitespaces $xml = str_ireplace(array("<taxon></taxon>", "<taxon/>"), "", $xml); return $xml; }