Beispiel #1
0
 /**
  * Gathers multipage endpoint responses and joins them into one array, using the passed callback functions to 
  * traverse and index the data. The result of this (potentially expensive) operation is cached.
  *
  * @param string $endpointHref the URL to the first page of the endpoint 
  * @param callable $indexFunc function to be used to extract the ID from/for and individual response item
  * @param callable $elementFunc function to be used to extract the desired data from and individual response item
  * @param string $accept the representation to request from CREST
  * @param int $ttl the time to live to be used in the cache
  * @param string $subCommandKey to avoid cache namespace collisions when different gather requests access the same
  * endpoint URL, an additional subcommand key can be specified.
  *
  * @return array
  */
 public function gatherCached($endpointHref, callable $indexFunc = null, callable $elementFunc = null, $accept = null, $ttl = 15, $subCommandKey = null)
 {
     //echo time2s()."cl.gatherCached($endpointHref)\n";
     $dataKey = 'gathered:' . $endpointHref . (isset($subCommandKey) ? ',' . $subCommandKey : '');
     //we introduce another caching layer here because gathering and reindexing multipage data is expensive, even
     //when the individual CREST responses are already cached.
     try {
         $dataObj = $this->cache->getItem($dataKey);
     } catch (Exceptions\KeyNotFoundInCacheException $e) {
         //setup a cacheable array object
         $dataClass = Config::getIveeClassName('CacheableArray');
         $dataObj = new $dataClass($dataKey, $ttl);
         //gather all the pages into one compact array
         $dataObj->data = $this->gather($endpointHref, $indexFunc, $elementFunc, $accept);
         $this->cache->setItem($dataObj);
     }
     return $dataObj->data;
 }
Beispiel #2
0
 /**
  * Performs parallel asynchronous GET requests.
  * 
  * @param array $hrefs the hrefs to request
  * @param array $header the header to be passed in all requests
  * @param callable $getAuthHeader that returns an appropriate bearer authentication header line, for instance 
  * Client::getBearerAuthHeader(). We do this on-the-fly as during large multi GET batches the access token might
  * expire.
  * @param callable $callback a function expecting one \iveeCrest\Response object as argument, called for every
  * successful response
  * @param callable $errCallback a function expecting one \iveeCrest\Response object as argument, called for every
  * non-successful response
  * @param bool $cache whether the Responses should be cached
  * 
  * @return void
  * @throws \iveeCrest\Exceptions\IveeCrestException on general CURL error
  */
 public function asyncMultiGet(array $hrefs, array $header, callable $getAuthHeader, callable $callback, callable $errCallback = null, $cache = true)
 {
     //echo time2s()."curl.asyncMultiGet()\n"; //var_dump($hrefs);
     //separate hrefs that are already cached from those that need to be requested
     $hrefsToQuery = array();
     foreach ($hrefs as $href) {
         $responseKey = 'get:' . $href;
         try {
             $callback($this->cache->getItem($responseKey));
         } catch (Exceptions\KeyNotFoundInCacheException $e) {
             $hrefsToQuery[] = $href;
         }
         if (!in_array($href, $hrefsToQuery)) {
             // $hrefsToQuery: not in cache
             $url_short = str_replace(Config::getCrestBaseUrl(), '', $href);
             echo time2s() . "cache " . $url_short . "\n";
         }
     }
     // make sure the rolling window isn't greater than the number of hrefs
     $rollingWindow = count($hrefsToQuery) > 10 ? 10 : count($hrefsToQuery);
     //CURL options for all requests
     $stdOptions = array(CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => $this->userAgent, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_CIPHER_LIST => 'TLSv1', CURLOPT_CAINFO => __DIR__ . '/cacert.pem', CURLOPT_HTTPHEADER => $header);
     $responses = array();
     $master = curl_multi_init();
     //setup the first batch of requests
     for ($i = 0; $i < $rollingWindow; $i++) {
         $href = $hrefsToQuery[$i];
         //echo time2s()."curl.multi  $href\n";
         $responses[$href] = $this->addHandleToMulti($master, $href, $stdOptions, $getAuthHeader, $header);
     }
     $running = false;
     do {
         //execute whichever handles need to be started
         do {
             $execrun = curl_multi_exec($master, $running);
         } while ($execrun == CURLM_CALL_MULTI_PERFORM);
         if ($execrun != CURLM_OK) {
             $crestExceptionClass = Config::getIveeClassName('IveeCrestException');
             throw new $crestExceptionClass("CURL Multi-GET error", $execrun);
         }
         //block until we have anything on at least one of the handles
         curl_multi_select($master);
         //a request returned, process it
         while ($done = curl_multi_info_read($master)) {
             //echo "curl_multi_info_read()...\n"; var_dump($done);
             $info = curl_getinfo($done['handle']);
             //find the Response object matching the URL
             $res = $responses[$info['url']];
             $url_short = str_replace(Config::getCrestBaseUrl(), '', $info['url']);
             //set info and content to Response object
             $res->setInfo($info);
             $res->setContent(curl_multi_getcontent($done['handle']));
             //execute the callbacks passing the response as argument
             if ($info['http_code'] == 200) {
                 //cache it if configured
                 if ($cache) {
                     $this->cache->setItem($res);
                 }
                 $callback($res);
                 if (isset($this->requeued[$info['url']])) {
                     $this->requeued[$info['url']] = NULL;
                     time2s() . ">>> recaptured " . $url_short . "\n";
                 }
                 echo time2s() . "got   " . $url_short . "\n";
             } elseif (isset($errCallback)) {
                 echo time2s() . "cw.asyncMultiGet(): curl_multi, http " . $info['http_code'] . "\n";
                 echo time2s() . "requeueing " . $url_short . "\n";
                 $errCallback($res);
                 $hrefsToQuery[] = $info['url'];
                 //put back on queue
                 $this->requeued[$info['url']] = true;
             }
             //remove the reference to response to conserve memory on large batches
             $responses[$info['url']] = null;
             //start a new request (it's important to do this before removing the old one)
             if ($i < count($hrefsToQuery)) {
                 $href = $hrefsToQuery[$i++];
                 //echo time2s()."curl.multi  $href\n";
                 $responses[$href] = $this->addHandleToMulti($master, $href, $stdOptions, $getAuthHeader, $header);
             }
             //remove the curl handle that just completed
             curl_multi_remove_handle($master, $done['handle']);
         }
         //don't waste too many CPU cycles on looping
         usleep(1000);
     } while ($running > 0);
     curl_multi_close($master);
 }