Пример #1
0
 /**
  * findLatestExistingAdLinks fetch latest 1024 ad links from post table
  * 
  * @return array  Array of ad links
  */
 public static function findLatestExistingAdLinks()
 {
     $sql = "SELECT website FROM post ORDER BY create_at DESC LIMIT 1024";
     $rawWebsitesData = Post::findBySql($sql)->asArray()->all();
     return array_map(function ($data) {
         return $data['website'];
     }, $rawWebsitesData);
 }
Пример #2
0
 /**
  * isAdLinkCrawled return true if $adlink is already in Post database table's 'website' column. 
  * it can be overriden if subclass does not store adlink there.
  *
  * @param  string  $adlink  
  * @return boolean         
  */
 public function isAdLinkCrawled($adlink)
 {
     $ad = Post::findOne(['website' => $adlink]);
     return $ad !== null;
 }
Пример #3
0
 /**
  * crawl fetch data and insert it into database.
  * 
  * @param  BaseModel $model 
  */
 public function crawl(BaseModel $model)
 {
     // set 500 seconds time limit to run this program
     set_time_limit(500);
     $time_start = microtime(true);
     $date = date('m/d/Y h:i:s a', time());
     echo '[ ' . $date . ' ]: ';
     $posts = $model->fetchAdData();
     $time_end = microtime(true);
     echo " [ time spent on crawling: " . ($time_end - $time_start) . ' ] ';
     if (!empty($posts)) {
         Post::batchInsert($posts);
         echo "There are [ " . count($posts) . " ] ad inserted" . PHP_EOL;
     } else {
         echo "No action taken" . PHP_EOL;
     }
 }