/** * Getting crowler * @param string $url * @return boolean|\Crowler */ public static function getAnalyzer($url) { // Check is robots allowed if (!Robots::robots_allowed($url, Config::$agent_name)) { Providers::change_url_status($url, Providers::URLS_TYPE_ROBOTS_NOT_ALLOWED); _w('Robots not allowed'); return false; } // Create object $obj = new ContentAnalyzer($url); if (!$obj->getCONTENT_DATA()) { Providers::change_url_status($url, Providers::URLS_TYPE_ERROR_NO_DATA); return false; } return $obj; }
// If content ignored if (!$ca) { _w('ignored'); // Skip continue; } // _w($ca->getOgDescription()); // die; _w('Createing general CA data'); _w('populating search table'); if (Providers::create_search_item($ca)) { _w("search data inserted"); } else { _w("unable to insert search data"); } _w('setting status to indexed'); if (Providers::change_url_status($url_w, Providers::URLS_TYPE_INDEXED)) { _w('status changed to INDEXED'); } else { _w('unable to change status'); } // _w($ca->getLinks()); // die; _w('inserting all other urls'); Providers::insert_url_list($ca->getLinks()); } } catch (Exception $ex) { _w('WAS ERROR !!! ' . $ex->getMessage()); } _w('Done for now'); unset($urls);