Example #1
0
     // $xml = str_replace(array(chr(0x73), chr(0x20), chr(0x68), chr(0x61)), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x73, 0x6b), " ", $xml);
     $xml = str_replace(array(0x32, 0x35, 0x2e, 0x35), " ", $xml);
     $xml = str_replace(array(0x32, 0x33, 0x20, 0x6d), " ", $xml);
     $xml = str_replace(chr(0x32) . " " . chr(0x33) . " " . chr(0x20) . " " . chr(0x6d), " ", $xml);
     $xml = str_replace(array(0x20, 0x4e, 0x61, 0x74), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x68, 0x6f), " ", $xml);
     $xml = str_replace(chr(0x73) . " " . chr(0x20) . " " . chr(0x68) . " " . chr(0x6f), " ", $xml);
     $xml = str_replace(array(0x77, 0x65, 0x72, 0x65), " ", $xml);
     $xml = str_replace(array(0xe2, 0x80, 0xc2, 0xa6), " ", $xml);
     $xml = str_replace(array(0x6e, 0x20, 0x32, 0x30), " ", $xml);
     $xml = str_replace(array(0x67, 0x75, 0x65, 0x7a), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x61, 0x6e), " ", $xml);
     $xml = str_replace(array(0x74, 0x7a, 0x3c, 0x2f), " ", $xml);
     // 0x74 0x7A 0x20 0x77
     $xml = format_utf8($xml);
     $xml = mb_convert_encoding($xml, "UTF-8", mb_detect_encoding($xml, "UTF-8, ISO-8859-1, ISO-8859-15", true));
     //
     $xml = str_replace(array(0x74, 0x7a, 0x3c, 0x2f), " ", $xml);
     $xml = str_replace(array(0x74, 0x7a, 0x20, 0x77), " ", $xml);
     // 0x73 0x20 0x68 0x6F
     if (!($OUT = Functions::file_open($xml_path, "w"))) {
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     echo "\nSaved [{$xml_path}]...\n";
 }
 //--------
 Functions::gzip_resource_xml($resource_id);
 $elapsed_time_sec = microtime(1) - $timestart;
Example #2
0
function start($resource_id)
{
    $new_resource_path = DOC_ROOT . "temp/" . $resource_id . ".xml";
    // $file = 'http://localhost/cp/Amphibiaweb/amphib_dump.xml';
    $file = 'http://amphibiaweb.org/amphib_dump.xml';
    if (!($new_resource_xml = Functions::lookup_with_cache($file, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => 86400)))) {
        echo "\n\n Content partner's server is down, connector will now terminate.\n";
    } else {
        // These may look like the same wrong characters - but they are several different wrong characters
        $new_resource_xml = str_replace("“", "\"", $new_resource_xml);
        $new_resource_xml = str_replace("”", "\"", $new_resource_xml);
        $new_resource_xml = str_replace("–", "-", $new_resource_xml);
        if (!($OUT = Functions::file_open($new_resource_path, "w+"))) {
            return;
        }
        fwrite($OUT, $new_resource_xml);
        fclose($OUT);
        unset($new_resource_xml);
        $taxa = array();
        $xml = simplexml_load_file($new_resource_path);
        $total = count($xml->species);
        $i = 0;
        foreach (@$xml->species as $species) {
            $i++;
            if ($i % 1000 == 0) {
                echo "\n {$i} of {$total} ";
            }
            $amphibID = (int) trim($species->amphib_id);
            $genus = format_utf8((string) trim($species->genus));
            $speciesName = format_utf8((string) trim($species->species));
            $order = format_utf8((string) trim($species->ordr));
            $family = format_utf8((string) trim($species->family));
            $commonNames = format_utf8((string) trim($species->common_name));
            $commonNames = explode(",", $commonNames);
            $submittedBy = format_utf8((string) trim($species->submittedby));
            $editedBy = format_utf8((string) trim($species->editedby));
            $description = format_utf8((string) trim($species->description));
            $distribution = format_utf8((string) trim($species->distribution));
            $life_history = format_utf8((string) trim($species->life_history));
            $trends_and_threats = format_utf8((string) trim($species->trends_and_threats));
            $relation_to_humans = format_utf8((string) trim($species->relation_to_humans));
            $comments = format_utf8((string) trim($species->comments));
            $ref = format_utf8((string) trim($species->refs));
            $separator = "<p>";
            $separator = "<p>";
            $ref = explode($separator, $ref);
            $refs = array();
            foreach ($ref as $r) {
                $refs[] = array("fullReference" => trim($r));
            }
            $description = fix_article($description);
            $distribution = fix_article($distribution);
            $life_history = fix_article($life_history);
            $trends_and_threats = fix_article($trends_and_threats);
            $relation_to_humans = fix_article($relation_to_humans);
            $comments = fix_article($comments);
            $pageURL = "http://amphibiaweb.org/cgi/amphib_query?where-genus=" . $genus . "&where-species=" . $speciesName . "&account=amphibiaweb";
            if (!$submittedBy) {
                continue;
            }
            $agents = array();
            if ($submittedBy) {
                $parts = preg_split("/(,| and )/", $submittedBy);
                while (list($key, $val) = each($parts)) {
                    $val = trim($val);
                    if (!$val) {
                        continue;
                    }
                    $agentParameters = array();
                    $agentParameters["role"] = "author";
                    $agentParameters["fullName"] = $val;
                    $agents[] = new \SchemaAgent($agentParameters);
                }
            }
            $nameString = trim($genus . " " . $speciesName);
            $taxonParameters = array();
            $taxonParameters["identifier"] = $amphibID;
            $taxonParameters["source"] = $pageURL;
            $taxonParameters["kingdom"] = "Animalia";
            $taxonParameters["phylum"] = "Chordata";
            $taxonParameters["class"] = "Amphibia";
            $taxonParameters["order"] = $order;
            $taxonParameters["family"] = $family;
            $taxonParameters["scientificName"] = $nameString;
            foreach ($commonNames as $common_name) {
                $taxonParameters['commonNames'][] = new \SchemaCommonName(array("name" => $common_name, "language" => "en"));
            }
            $taxonParameters["dataObjects"] = array();
            $dataObjects = array();
            if ($distribution) {
                $dataObjects[] = get_data_object($amphibID . "_distribution", "Distribution and Habitat", $distribution, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution", $refs, $agents, $pageURL);
            }
            if ($life_history) {
                $dataObjects[] = get_data_object($amphibID . "_life_history", "Life History, Abundance, Activity, and Special Behaviors", $life_history, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Trends", $refs, $agents, $pageURL);
            }
            if ($trends_and_threats) {
                $dataObjects[] = get_data_object($amphibID . "_trends_threats", "Life History, Abundance, Activity, and Special Behaviors", $trends_and_threats, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Threats", $refs, $agents, $pageURL);
            }
            if ($relation_to_humans) {
                $dataObjects[] = get_data_object($amphibID . "_relation_to_humans", "Relation to Humans", $relation_to_humans, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement", $refs, $agents, $pageURL);
            }
            if ($description != "") {
                if ($comments != "") {
                    $description .= $comments;
                } else {
                    if ($comments != "") {
                        $description = $comments;
                    }
                }
            }
            if ($description) {
                $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL);
            }
            /* we didn't get <comments>
               if($comments)       $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL);        
               */
            foreach ($dataObjects as $k => $v) {
                $taxonParameters["dataObjects"][] = new \SchemaDataObject($v);
                unset($v);
            }
            $taxa[] = new \SchemaTaxon($taxonParameters);
            //if($i >= 5) break; //debug
        }
        $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa);
        $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
        if (!($OUT = Functions::file_open($old_resource_path, "w+"))) {
            return;
        }
        fwrite($OUT, $new_resource_xml);
        fclose($OUT);
        // Functions::set_resource_status_to_force_harvest($resource_id);
        shell_exec("rm " . $new_resource_path);
        // Functions::gzip_resource_xml($resource_id);
        // $elapsed_time_sec = microtime(1)-$timestart;
        // echo "\n";
        // echo "elapsed time = $elapsed_time_sec sec                 \n";
        // echo "elapsed time = " . $elapsed_time_sec/60 . " minutes  \n";
        // echo "elapsed time = " . $elapsed_time_sec/60/60 . " hours \n";
        // echo "\n\n Done processing.";
    }
}