/** * let appetency run * @param int $start * @param int $end */ public function run($start = 0, $end = 0) { $start = intval($start) > 0 ? intval($start) : 1; $end = intval($end) >= $start ? intval($end) : PHP_INT_MAX; $notfound_list_no = 0; $notfound_list_count = 0; $spider_archives_count = 0; $spider_done_archives_count = 0; $redo_list = array(); $redo_log = array(); $spider_start_at = date('Y-m-d H:i:s'); $this->log('spider_start'); for ($list_no = $start; $list_no <= $end; $list_no++) { $archives = $this->site->get_list_archives($this->site->list_url($list_no)); $list_done_archives_count = 0; if (empty($archives)) { if ($notfound_list_count > 5) { //no archives for 5 times? maybe list page reach the end break; } $notfound_list_no = $list_no; $notfound_list_count++; } else { $list_start_at = date('Y-m-d H:i:s'); if ($notfound_list_count) { $redo_list[] = $notfound_list_no; $notfound_list_count = 0; } foreach ($archives as $archive) { if (!$archive->id) { continue; } if ($archive->id <= $this->data['from'] && empty($this->data['redo_list'])) { // all jobs have been done before, so break 2 break 2; } if (in_array($archive->id, $this->data['done_log'])) { $list_done_archives_count++; continue; } $spider_archives_count++; if ($this->download_archive_images($archive)) { $this->data['done_log'][] = $archive->id; $list_done_archives_count++; $spider_done_archives_count++; } else { $this->data['redo_log'][] = $archive->id; } $this->set_data(); } } if ($archives) { $list_end_at = date('Y-m-d H:i:s'); $list_archives_count = count($archives); $line = $list_end_at . " list page No.{$list_no} : {$list_done_archives_count}/{$list_archives_count} done " . sprintf('%.1fs', $list_end_at - $list_start_at); $this->log('list_done', $line); for ($i = 0; $i < $list_archives_count; $i++) { unset($archives[$i]); // wish it works } } } $spider_end_at = date('Y-m-d H:i:s'); $line = $spider_end_at . " {$spider_done_archives_count}/{$spider_archives_count} done. start at {$spider_start_at}, end at {$spider_end_at}"; $this->log('spider_done', $line); rsort($this->data['done_log']); rsort($this->data['redo_log']); $this->set_data(); }