$result_dir = CACHE_DIR . DS . 'phantomjs_results'; if ($handle = opendir($result_dir)) { $i = 0; while (false !== ($entry = readdir($handle))) { if (preg_match('/^account_craw/', $entry) && $i < $how_many_processed_once) { $file = $result_dir . DS . $entry; // get created date from file name $matches = array(); preg_match('/_((\\d+)\\-\\d+\\-\\d+)\\-/', $file, $matches); $date = $matches[1]; $year = $matches[2]; // get wechat_id from file name $matches = array(); preg_match('/account_crawl_([^_]+)_/', $file, $matches); $wechat_id = $matches[1]; $wechat_account = WechatAccount::findByWechatId($wechat_id); if ($wechat_account == null) { $log = new Log('wechat_account', Log::ERROR, 'Collect error: can not find wechat_account with wechat_id:' . $wechat_id); unlink($file); exit; } // parse html load_library_simple_html_dom(); $html = file_get_contents($file); unlink($file); if (strpos($html, '[Error]') !== FALSE) { $log = new Log('wechat_account', Log::ERROR, $html); $log->save(); } else { $dom = str_get_html($html); foreach ($dom->find('.wx-rb3') as $article) {