/** * Report statistics to the console output. * * @param array $stats */ private function report(array $stats) { $indicator = $this->getIndicator(); $indicator->advance(); $message = strtr('Crawling... {{sent}} sent - {{remaining}} left', ['{{sent}}' => $stats['sent'], '{{remaining}}' => $this->queue->count()]); $indicator->setMessage($message); }
/** * Start crawling. * * @param int $chunk * * @return \GuzzleHttp\Promise\PromiseInterface */ public function start($chunk = 5) { $this->dispatchStart(); // We need to use a double loop of generators here, because // if $chunk is greater than the number of items in the queue, // the requestWorkerFn exits the generator loop before any new // requests can be added by processing and cannot be restarted. // The outer generator ($gen) restarts the processing in that case. $gen = function () use($chunk) { while ($this->queue->count() > 0) { $inner = new EachPromise($this->getRequestWorkerFn(), ['concurrency' => $chunk]); (yield $inner->promise()); } }; $outer = new EachPromise($gen(), ['concurrency' => 1]); $finish = function ($results) { $this->dispatchFinish(); return $results; }; return $outer->promise()->then($finish, $finish); }