Example #1
0
 /**
  * Getting crowler
  * @param string $url
  * @return boolean|\Crowler
  */
 public static function getAnalyzer($url)
 {
     // Check is robots allowed
     if (!Robots::robots_allowed($url, Config::$agent_name)) {
         Providers::change_url_status($url, Providers::URLS_TYPE_ROBOTS_NOT_ALLOWED);
         _w('Robots not allowed');
         return false;
     }
     // Create object
     $obj = new ContentAnalyzer($url);
     if (!$obj->getCONTENT_DATA()) {
         Providers::change_url_status($url, Providers::URLS_TYPE_ERROR_NO_DATA);
         return false;
     }
     return $obj;
 }
Example #2
0
        // If content ignored
        if (!$ca) {
            _w('ignored');
            // Skip
            continue;
        }
        //		_w($ca->getOgDescription());
        //		die;
        _w('Createing general CA data');
        _w('populating search table');
        if (Providers::create_search_item($ca)) {
            _w("search data inserted");
        } else {
            _w("unable to insert search data");
        }
        _w('setting status to indexed');
        if (Providers::change_url_status($url_w, Providers::URLS_TYPE_INDEXED)) {
            _w('status changed to INDEXED');
        } else {
            _w('unable to change status');
        }
        //		_w($ca->getLinks());
        //		die;
        _w('inserting all other urls');
        Providers::insert_url_list($ca->getLinks());
    }
} catch (Exception $ex) {
    _w('WAS ERROR !!! ' . $ex->getMessage());
}
_w('Done for now');
unset($urls);