public static function get_all_eol_photos($auth_token = "", $resource_file = null) { self::create_cache_path(); $all_taxa = array(); $used_image_ids = array(); $per_page = 500; // Get metadata about the EOL Flickr pool $response = self::pools_get_photos(FLICKR_EOL_GROUP_ID, "", 1, 1, $auth_token); if ($response && isset($response->photos->total)) { $total = $response->photos->total; // number of API calls to be made $total_pages = ceil($total / $per_page); $taxa = array(); $total_pages = 20; $per_page = 100; require_vendor('eol_content_schema_v2'); $archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => DOC_ROOT . "/temp/flickr_dwc/")); for ($i = 1; $i <= $total_pages; $i++) { echo "getting page {$i}: " . time_elapsed() . "\n"; $page_taxa = self::get_eol_photos($per_page, $i, $auth_token); if ($page_taxa) { foreach ($page_taxa as $t) { if ($resource_file) { // fwrite($resource_file, $t->__toXML()); self::old_to_new_conversion($t, $archive_builder); } else { $all_taxa[] = $t; } } } } $archive_builder->finalize(); } return $all_taxa; }
public function get_google_spreadsheet($options) { /* This will return an array of $sheet[col][row] values */ if (!isset($options["spreadsheet_title"])) { debug("[spreadsheet_title] is a required paramemter \n"); return false; } require_vendor('google_api'); if (!isset($options["number_of_columns_to_return"])) { $options["number_of_columns_to_return"] = "all"; } // use this to return the no. of columns from left to right if (!isset($options["column_number_to_return"])) { $options["column_number_to_return"] = false; } // use this to return a single column if (!isset($options["timeout"])) { $options["timeout"] = 100; } if (!isset($options["google_username"])) { $options["google_username"] = $GLOBALS['GOOGLE_USERNAME']; } if (!isset($options["google_password"])) { $options["google_password"] = $GLOBALS['GOOGLE_PASSWORD']; } $params = array("timeout" => $options["timeout"]); // parameters for the google_api $spreadsheet_tables_api = new \google_api\GoogleSpreadsheetsAPI($options["google_username"], $options["google_password"], @$_SESSION['GOOGLE_AUTH_TOKEN'], '', $params); $response = $spreadsheet_tables_api->get_spreadsheets($params); foreach ($response->entry as $entry) { if ($entry->title == $options["spreadsheet_title"]) { $URL_for_spreadsheet = $entry->content['src']; $spreadsheet_repsonse = $spreadsheet_tables_api->get_response($URL_for_spreadsheet, $params); $sheet_url = $spreadsheet_repsonse->entry->link[0]['href']; $worksheet_repsonse = $spreadsheet_tables_api->get_response($sheet_url, $params); $cols = array(); foreach ($worksheet_repsonse->entry as $entry) { $cols[substr($entry->title, 0, 1)][substr($entry->title, 1, strlen($entry->title) - 1)] = $entry->content; } $letters = array_keys($cols); $max_count = 0; foreach ($letters as $letter) { if (count($cols[$letter]) > $max_count) { $max_count = count($cols[$letter]); } } $sheet = array(); // to be returned $col_count = 0; foreach ($letters as $letter) { $col_count++; for ($i = 1; $i <= $max_count; $i++) { $sheet[$col_count][$i] = @$cols[$letter][$i]; } if ($options["number_of_columns_to_return"] != "all") { if ($col_count >= $options["number_of_columns_to_return"]) { return $sheet; } } } if ($options["column_number_to_return"]) { return $sheet[$options["column_number_to_return"]]; } return $sheet; } } return false; }
<?php define('USING_SPM', true); include_once dirname(__FILE__) . "/../../config/environment.php"; // require_vendor('rdfapi-php'); require_vendor('rdf'); $download_cache_path = DOC_ROOT . "temp/plazi.xml"; $new_resource_path = DOC_ROOT . "temp/30.xml"; $new_resource_xml = Functions::get_remote_file("http://plazi.cs.umb.edu/exist/rest/db/taxonx_docs"); if (!($OUT = fopen($new_resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $new_resource_path); return; } fwrite($OUT, $new_resource_xml); fclose($OUT); unset($new_resource_xml); $prefix = "http://plazi.cs.umb.edu/exist/rest/db/taxonx_docs/getSPM.xq?render=xhtml&description=broad&associations=no&doc="; $all_taxa = array(); $file_names = array(); $xml = simplexml_load_file($new_resource_path); $xml_exist = $xml->children("http://exist.sourceforge.net/NS/exist"); foreach (@$xml_exist->collection as $collection) { $collection_exist = $collection->children("http://exist.sourceforge.net/NS/exist"); foreach (@$collection_exist->resource as $resource) { $attributes = array(); foreach ($resource->attributes() as $a => $b) { $attributes[$a] = $b; } $file_names[trim($attributes["name"])] = 1; } }
<?php namespace php_active_record; define('DOWNLOAD_WAIT_TIME', '1000000'); // 1 second wait after every web request include_once dirname(__FILE__) . "/../../config/environment.php"; if (!Functions::can_this_connector_run("80")) { return; } define("WIKI_USER_PREFIX", "http://en.wikipedia.org/wiki/User:"******"wikipedia"); //$GLOBALS['ENV_DEBUG'] = false; $harvester = new WikipediaHarvester(); $harvester->begin_wikipedia_harvest();