public static function get_all_eol_photos($auth_token = "", $resource_file = null)
 {
     self::create_cache_path();
     $all_taxa = array();
     $used_image_ids = array();
     $per_page = 500;
     // Get metadata about the EOL Flickr pool
     $response = self::pools_get_photos(FLICKR_EOL_GROUP_ID, "", 1, 1, $auth_token);
     if ($response && isset($response->photos->total)) {
         $total = $response->photos->total;
         // number of API calls to be made
         $total_pages = ceil($total / $per_page);
         $taxa = array();
         $total_pages = 20;
         $per_page = 100;
         require_vendor('eol_content_schema_v2');
         $archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => DOC_ROOT . "/temp/flickr_dwc/"));
         for ($i = 1; $i <= $total_pages; $i++) {
             echo "getting page {$i}: " . time_elapsed() . "\n";
             $page_taxa = self::get_eol_photos($per_page, $i, $auth_token);
             if ($page_taxa) {
                 foreach ($page_taxa as $t) {
                     if ($resource_file) {
                         // fwrite($resource_file, $t->__toXML());
                         self::old_to_new_conversion($t, $archive_builder);
                     } else {
                         $all_taxa[] = $t;
                     }
                 }
             }
         }
         $archive_builder->finalize();
     }
     return $all_taxa;
 }
示例#2
0
 public function get_google_spreadsheet($options)
 {
     /* This will return an array of $sheet[col][row] values */
     if (!isset($options["spreadsheet_title"])) {
         debug("[spreadsheet_title] is a required paramemter \n");
         return false;
     }
     require_vendor('google_api');
     if (!isset($options["number_of_columns_to_return"])) {
         $options["number_of_columns_to_return"] = "all";
     }
     // use this to return the no. of columns from left to right
     if (!isset($options["column_number_to_return"])) {
         $options["column_number_to_return"] = false;
     }
     // use this to return a single column
     if (!isset($options["timeout"])) {
         $options["timeout"] = 100;
     }
     if (!isset($options["google_username"])) {
         $options["google_username"] = $GLOBALS['GOOGLE_USERNAME'];
     }
     if (!isset($options["google_password"])) {
         $options["google_password"] = $GLOBALS['GOOGLE_PASSWORD'];
     }
     $params = array("timeout" => $options["timeout"]);
     // parameters for the google_api
     $spreadsheet_tables_api = new \google_api\GoogleSpreadsheetsAPI($options["google_username"], $options["google_password"], @$_SESSION['GOOGLE_AUTH_TOKEN'], '', $params);
     $response = $spreadsheet_tables_api->get_spreadsheets($params);
     foreach ($response->entry as $entry) {
         if ($entry->title == $options["spreadsheet_title"]) {
             $URL_for_spreadsheet = $entry->content['src'];
             $spreadsheet_repsonse = $spreadsheet_tables_api->get_response($URL_for_spreadsheet, $params);
             $sheet_url = $spreadsheet_repsonse->entry->link[0]['href'];
             $worksheet_repsonse = $spreadsheet_tables_api->get_response($sheet_url, $params);
             $cols = array();
             foreach ($worksheet_repsonse->entry as $entry) {
                 $cols[substr($entry->title, 0, 1)][substr($entry->title, 1, strlen($entry->title) - 1)] = $entry->content;
             }
             $letters = array_keys($cols);
             $max_count = 0;
             foreach ($letters as $letter) {
                 if (count($cols[$letter]) > $max_count) {
                     $max_count = count($cols[$letter]);
                 }
             }
             $sheet = array();
             // to be returned
             $col_count = 0;
             foreach ($letters as $letter) {
                 $col_count++;
                 for ($i = 1; $i <= $max_count; $i++) {
                     $sheet[$col_count][$i] = @$cols[$letter][$i];
                 }
                 if ($options["number_of_columns_to_return"] != "all") {
                     if ($col_count >= $options["number_of_columns_to_return"]) {
                         return $sheet;
                     }
                 }
             }
             if ($options["column_number_to_return"]) {
                 return $sheet[$options["column_number_to_return"]];
             }
             return $sheet;
         }
     }
     return false;
 }
示例#3
0
<?php

define('USING_SPM', true);
include_once dirname(__FILE__) . "/../../config/environment.php";
// require_vendor('rdfapi-php');
require_vendor('rdf');
$download_cache_path = DOC_ROOT . "temp/plazi.xml";
$new_resource_path = DOC_ROOT . "temp/30.xml";
$new_resource_xml = Functions::get_remote_file("http://plazi.cs.umb.edu/exist/rest/db/taxonx_docs");
if (!($OUT = fopen($new_resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $new_resource_path);
    return;
}
fwrite($OUT, $new_resource_xml);
fclose($OUT);
unset($new_resource_xml);
$prefix = "http://plazi.cs.umb.edu/exist/rest/db/taxonx_docs/getSPM.xq?render=xhtml&description=broad&associations=no&doc=";
$all_taxa = array();
$file_names = array();
$xml = simplexml_load_file($new_resource_path);
$xml_exist = $xml->children("http://exist.sourceforge.net/NS/exist");
foreach (@$xml_exist->collection as $collection) {
    $collection_exist = $collection->children("http://exist.sourceforge.net/NS/exist");
    foreach (@$collection_exist->resource as $resource) {
        $attributes = array();
        foreach ($resource->attributes() as $a => $b) {
            $attributes[$a] = $b;
        }
        $file_names[trim($attributes["name"])] = 1;
    }
}
示例#4
0
<?php

namespace php_active_record;

define('DOWNLOAD_WAIT_TIME', '1000000');
// 1 second wait after every web request
include_once dirname(__FILE__) . "/../../config/environment.php";
if (!Functions::can_this_connector_run("80")) {
    return;
}
define("WIKI_USER_PREFIX", "http://en.wikipedia.org/wiki/User:"******"wikipedia");
//$GLOBALS['ENV_DEBUG'] = false;
$harvester = new WikipediaHarvester();
$harvester->begin_wikipedia_harvest();