/** * findLatestExistingAdLinks fetch latest 1024 ad links from post table * * @return array Array of ad links */ public static function findLatestExistingAdLinks() { $sql = "SELECT website FROM post ORDER BY create_at DESC LIMIT 1024"; $rawWebsitesData = Post::findBySql($sql)->asArray()->all(); return array_map(function ($data) { return $data['website']; }, $rawWebsitesData); }
/** * isAdLinkCrawled return true if $adlink is already in Post database table's 'website' column. * it can be overriden if subclass does not store adlink there. * * @param string $adlink * @return boolean */ public function isAdLinkCrawled($adlink) { $ad = Post::findOne(['website' => $adlink]); return $ad !== null; }
/** * crawl fetch data and insert it into database. * * @param BaseModel $model */ public function crawl(BaseModel $model) { // set 500 seconds time limit to run this program set_time_limit(500); $time_start = microtime(true); $date = date('m/d/Y h:i:s a', time()); echo '[ ' . $date . ' ]: '; $posts = $model->fetchAdData(); $time_end = microtime(true); echo " [ time spent on crawling: " . ($time_end - $time_start) . ' ] '; if (!empty($posts)) { Post::batchInsert($posts); echo "There are [ " . count($posts) . " ] ad inserted" . PHP_EOL; } else { echo "No action taken" . PHP_EOL; } }