Example #1
0
 /**
  * let appetency run
  * @param int $start
  * @param int $end
  */
 public function run($start = 0, $end = 0)
 {
     $start = intval($start) > 0 ? intval($start) : 1;
     $end = intval($end) >= $start ? intval($end) : PHP_INT_MAX;
     $notfound_list_no = 0;
     $notfound_list_count = 0;
     $spider_archives_count = 0;
     $spider_done_archives_count = 0;
     $redo_list = array();
     $redo_log = array();
     $spider_start_at = date('Y-m-d H:i:s');
     $this->log('spider_start');
     for ($list_no = $start; $list_no <= $end; $list_no++) {
         $archives = $this->site->get_list_archives($this->site->list_url($list_no));
         $list_done_archives_count = 0;
         if (empty($archives)) {
             if ($notfound_list_count > 5) {
                 //no archives for 5 times? maybe list page reach the end
                 break;
             }
             $notfound_list_no = $list_no;
             $notfound_list_count++;
         } else {
             $list_start_at = date('Y-m-d H:i:s');
             if ($notfound_list_count) {
                 $redo_list[] = $notfound_list_no;
                 $notfound_list_count = 0;
             }
             foreach ($archives as $archive) {
                 if (!$archive->id) {
                     continue;
                 }
                 if ($archive->id <= $this->data['from'] && empty($this->data['redo_list'])) {
                     // all jobs have been done before, so break 2
                     break 2;
                 }
                 if (in_array($archive->id, $this->data['done_log'])) {
                     $list_done_archives_count++;
                     continue;
                 }
                 $spider_archives_count++;
                 if ($this->download_archive_images($archive)) {
                     $this->data['done_log'][] = $archive->id;
                     $list_done_archives_count++;
                     $spider_done_archives_count++;
                 } else {
                     $this->data['redo_log'][] = $archive->id;
                 }
                 $this->set_data();
             }
         }
         if ($archives) {
             $list_end_at = date('Y-m-d H:i:s');
             $list_archives_count = count($archives);
             $line = $list_end_at . " list page No.{$list_no} : {$list_done_archives_count}/{$list_archives_count} done " . sprintf('%.1fs', $list_end_at - $list_start_at);
             $this->log('list_done', $line);
             for ($i = 0; $i < $list_archives_count; $i++) {
                 unset($archives[$i]);
                 // wish it works
             }
         }
     }
     $spider_end_at = date('Y-m-d H:i:s');
     $line = $spider_end_at . " {$spider_done_archives_count}/{$spider_archives_count} done. start at {$spider_start_at}, end at {$spider_end_at}";
     $this->log('spider_done', $line);
     rsort($this->data['done_log']);
     rsort($this->data['redo_log']);
     $this->set_data();
 }
Example #2
0
 public function __construct($url, Site $site)
 {
     $this->site = $site;
     $this->id = $this->site->archive_id($url);
     $this->url = $url;
 }