Zebra_cURL is a high performance PHP library acting as a wrapper to PHP's {@link http://www.php.net/manual/en/book.curl.php libcurl library}, which not only allows the running of multiple requests at once asynchronously, in parallel, but also as soon as one thread finishes it can be processed right away without having to wait for the other threads in the queue to finish. Also, each time a request is completed another one is added to the queue, thus keeping a constant number of threads running at all times and eliminating wasted CPU cycles from busy waiting. This result is a faster and more efficient way of processing large quantities of cURL requests (like fetching thousands of RSS feeds at once), drastically reducing processing time. This script supports GET and POST request, basic downloads, downloads from FTP servers, HTTP Authentication, and requests through proxy servers. For maximum efficiency downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary strain from the server of having to read files into memory first, and then writing them to disk. Zebra_cURL requires the {@link http://www.php.net/manual/en/curl.installation.php PHP cURL extension} to be enabled. The code is heavily commented and generates no warnings/errors/notices when PHP's error reporting level is set to {@link http://www.php.net/manual/en/function.error-reporting.php E_ALL}. Visit {@link http://stefangabos.ro/php-libraries/zebra-curl/} for more information. For more resources visit {@link http://stefangabos.ro/}
Exemplo n.º 1
0
 public static function do_sync()
 {
     global $wpdb;
     $debug_mode = defined('KIGO_DEBUG') && KIGO_DEBUG;
     // Do not log into New Relic, because this function is slow and we know why
     if (extension_loaded('newrelic')) {
         newrelic_ignore_transaction();
     }
     //Check that cron is "enabled" and that the secret is correct
     if (!defined('KIGO_CRON_SECRET') || !isset($_GET[self::GET_PARAM_CRON_SECRET]) || $_GET[self::GET_PARAM_CRON_SECRET] !== KIGO_CRON_SECRET) {
         self::log(array('message' => 'Missing/Invalid cron secret', 'info' => $_SERVER));
         self::handle_logs($debug_mode);
         exit;
     }
     // Ensure that no other cron will run concurrently by acquiring an advisory lock (at MySQL database)
     if (!$wpdb->get_var($wpdb->prepare('SELECT GET_LOCK(%s, 0)', self::ADV_LOCK_PROCESSING))) {
         self::log('Previous cron execution is not finished, could not acquire cron lock');
         self::handle_logs($debug_mode);
         exit;
     }
     $prevTimeTotal = microtime(true);
     if (is_multisite()) {
         require_once dirname(__FILE__) . '/ext/class-zebra-curl.php';
         // Change the default value of wp_is_large_network necessary if # of sites reach the 10000
         add_filter('wp_is_large_network', array('Kigo_Network_Cron', 'custom_wp_is_large_network'), 1, 3);
         // Initialize the list of sites
         $sites = wp_get_sites(array('limit' => self::CUSTOM_WP_IS_LARGE_NETWORK, 'deleted' => 0, 'archived' => 0));
         shuffle($sites);
         // Filter the sites, not to trigger a sync for site where the solution data have not been updated since X months
         self::filter_old_sites($sites);
         self::log(array('nb_sites' => count($sites)));
         //Do the Zebra cURL call (asynchronous calls)
         $curl = new Zebra_cURL();
         $curl->option(CURLOPT_TIMEOUT, self::CURL_TIMEOUT);
         $curl->threads = self::CURL_PARALLEL_CALLS;
         //Prepare URLs to be called
         $urls = array_map(array('Kigo_Network_Cron', 'generate_curl_urls'), $sites);
         $urls = array_filter($urls, function ($url) {
             return is_string($url);
         });
         $curl->get($urls, array('Kigo_Network_Cron', 'zebra_curl_callback'));
     } else {
         set_error_handler(array('Kigo_Network_Cron', 'php_error_handler'));
         // Add our custom handler for wp_die() because some functions die on error, and we don't want the script to die !
         add_filter('wp_die_ajax_handler', array('Kigo_Network_Cron', 'kigo_cron_wp_die_handler_filter'));
         $site_cron = new Kigo_Site_Cron();
         self::log($site_cron->sync_entities() ? true : $site_cron->_errors);
         restore_error_handler();
     }
     self::log(array('total_execution_time' => microtime(true) - $prevTimeTotal));
     if (!$wpdb->query($wpdb->prepare('SELECT RELEASE_LOCK(%s)', self::ADV_LOCK_PROCESSING))) {
         self::log('Could not release cron lock');
     }
     // Echo the logs in debug mode or send them by mail
     self::handle_logs($debug_mode);
     exit;
 }
Exemplo n.º 2
0
 function __construct()
 {
     if (!file_exists(BASE_PATH . '/assets/data/CLDR/en.xml') || !file_exists(BASE_PATH . '/assets/data/CLDR/supplementalData.xml')) {
         $curl = new Zebra_cURL();
         $curl->download(array('http://www.unicode.org/repos/cldr/tags/latest/common/main/en.xml', 'http://unicode.org/repos/cldr/trunk/common/supplemental/supplementalData.xml'), BASE_PATH . '/assets/data/cldr/');
     }
     $this->main = simplexml_load_file(BASE_PATH . '/assets/data/CLDR/en.xml', 'SimpleXMLElement', LIBXML_COMPACT);
     $this->supplemental = simplexml_load_file(BASE_PATH . '/assets/data/CLDR/supplementalData.xml', 'SimpleXMLElement', LIBXML_COMPACT);
     $this->territoryContainment = array('001' => array('019' => array('021' => array('BM', 'CA', 'GL', 'PM', 'US'), '013' => array('BZ', 'CR', 'GT', 'HN', 'MX', 'NI', 'PA', 'SV'), '029' => array('AG', 'AI', 'AW', 'BB', 'BL', 'BQ', 'BS', 'CU', 'CW', 'DM', 'DO', 'GD', 'GP', 'HT', 'JM', 'KN', 'KY', 'LC', 'MF', 'MQ', 'MS', 'PR', 'SX', 'TC', 'TT', 'VC', 'VG', 'VI'), '005' => array('AR', 'BO', 'BR', 'CL', 'CO', 'EC', 'FK', 'GF', 'GY', 'PE', 'PY', 'SR', 'UY', 'VE')), '002' => array('015' => array('DZ', 'EG', 'EH', 'LY', 'MA', 'SD', 'SS', 'TN', 'EA', 'IC'), '011' => array('BF', 'BJ', 'CI', 'CV', 'GH', 'GM', 'GN', 'GW', 'LR', 'ML', 'MR', 'NE', 'NG', 'SH', 'SL', 'SN', 'TG'), '017' => array('AO', 'CD', 'CF', 'CG', 'CM', 'GA', 'GQ', 'ST', 'TD'), '014' => array('BI', 'DJ', 'ER', 'ET', 'KE', 'KM', 'MG', 'MU', 'MW', 'MZ', 'RE', 'RW', 'SC', 'SO', 'TZ', 'UG', 'YT', 'ZM', 'ZW'), '018' => array('BW', 'LS', 'NA', 'SZ', 'ZA')), '150' => array('154' => array('GG', 'IM', 'JE', 'AX', 'DK', 'EE', 'FI', 'FO', 'GB', 'IE', 'IS', 'LT', 'LV', 'NO', 'SE', 'SJ'), '155' => array('AT', 'BE', 'CH', 'DE', 'FR', 'LI', 'LU', 'MC', 'NL'), '151' => array('BG', 'BY', 'CZ', 'HU', 'MD', 'PL', 'RO', 'RU', 'SK', 'UA'), '039' => array('AD', 'AL', 'BA', 'ES', 'GI', 'GR', 'HR', 'IT', 'ME', 'MK', 'MT', 'RS', 'PT', 'SI', 'SM', 'VA', 'XK')), '142' => array('145' => array('AE', 'AM', 'AZ', 'BH', 'CY', 'GE', 'IL', 'IQ', 'JO', 'KW', 'LB', 'OM', 'PS', 'QA', 'SA', 'SY', 'TR', 'YE'), '143' => array('TM', 'TJ', 'KG', 'KZ', 'UZ'), '030' => array('CN', 'HK', 'JP', 'KP', 'KR', 'MN', 'MO', 'TW'), '034' => array('AF', 'BD', 'BT', 'IN', 'IR', 'LK', 'MV', 'NP', 'PK'), '035' => array('BN', 'ID', 'KH', 'LA', 'MM', 'MY', 'PH', 'SG', 'TH', 'TL', 'VN')), '009' => array('053' => array('AU', 'NF', 'NZ'), '054' => array('FJ', 'NC', 'PG', 'SB', 'VU'), '057' => array('FM', 'GU', 'KI', 'MH', 'MP', 'NR', 'PW'), '061' => array('AS', 'CK', 'NU', 'PF', 'PN', 'TK', 'TO', 'TV', 'WF', 'WS'), 'QO' => array('AQ', 'BV', 'CC', 'CX', 'GS', 'HM', 'IO', 'TF', 'UM', 'AC', 'CP', 'DG', 'TA'))));
 }
Exemplo n.º 3
0
<?php

// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// download one of the official twitter image
$curl->download('https://abs.twimg.com/a/1362101114/images/resources/twitter-bird-callout.png', 'cache');
echo 'Image downloaded - look in the "cache" folder!';
Exemplo n.º 4
0
function callback($result)
{
    // everything went well at cURL level
    if ($result->response[1] == CURLE_OK) {
        // if server responded with code 200 (meaning that everything went well)
        // see http://httpstatus.es/ for a list of possible response codes
        if ($result->info['http_code'] == 200) {
            // see all the returned data
            // remember, that the "body" property of $result, unless specifically disabled in the library's constructor,
            // is run through "htmlentities()", so you may want to "html_entity_decode" it
            print_r('<pre>');
            print_r($result->info);
            // show the server's response code
        } else {
            die('Server responded with code ' . $result->info['http_code']);
        }
        // something went wrong
        // ($result still contains all data that could be gathered)
    } else {
        die('cURL responded with: ' . $result->response[0]);
    }
}
// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// cache results 3600 seconds
$curl->cache('cache', 3600);
// get RSS feeds of some popular tech websites
$curl->get(array('http://rss1.smashingmagazine.com/feed/', 'http://allthingsd.com/feed/', 'http://feeds.feedburner.com/nettuts', 'http://feeds.feedburner.com/alistapart/main'), 'callback');
Exemplo n.º 5
0
<?php

function callback($result)
{
    // results from twitter is json-encoded;
    // remember, the "body" property of $result is run through "htmlentities()" so we need to "html_entity_decode" it
    $result->body = json_decode(html_entity_decode($result->body));
    // show everything
    print_r('<pre>');
    print_r($result->info);
}
// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// cache results 60 seconds
$curl->cache('cache', 60);
// search twitter for the "jquery" hashtag
$curl->get('http://search.twitter.com/search.json?q=' . urlencode('#jquery'), 'callback');
Exemplo n.º 6
0
<?php

// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// get a random file from mozilla's public ftp server at http://ftp.mozilla.org/
$curl->ftp_download('http://ftp.mozilla.org/pub/mozilla.org/webtools/bugzilla-4.0-to-4.0.5-nodocs.diff.gz', 'cache');
echo 'File downloaded!';
Exemplo n.º 7
0
            } else {
                // show title and date for each entry
                foreach ($xml->entry as $entry) {
                    echo '<h2><span>' . $feeds[$result->info['original_url']] . '</span> <a href="' . $entry->link['href'] . '">' . $entry->title . '</a></h2>';
                    echo '<p>' . $entry->updated . '</p><hr>';
                }
            }
            // show the server's response code
        } else {
            die('Server responded with code ' . $result->info['http_code']);
        }
        // something went wrong
        // ($result still contains all data that could be gathered)
    } else {
        die('cURL responded with: ' . $result->response[0]);
    }
}
// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// cache results 3600 seconds
$curl->cache('cache', 3600);
$feeds = array('http://rss1.smashingmagazine.com/feed/' => 'Smashing Magazine', 'http://feeds.feedburner.com/nettuts' => 'TutsPlus', 'http://feeds.feedburner.com/alistapart/main' => 'A List Apart');
// get RSS feeds of some popular tech websites
$curl->get(array_keys($feeds), 'callback', $feeds);
?>

</body
</html>
Exemplo n.º 8
0
<?php

// include the library
require '../Zebra_cURL.php';
// instantiate the Zebra_cURL class
$curl = new Zebra_cURL();
// cache results 3600 seconds
$curl->cache('cache', 3600);
// a simple way of scrapping a page
// (you can do more with the "get" method and callback functions)
echo $curl->scrap('https://google.com', true);