Esempio n. 1
0
 /**
  * Getting crowler
  * @param string $url
  * @return boolean|\Crowler
  */
 public static function getAnalyzer($url)
 {
     // Check is robots allowed
     if (!Robots::robots_allowed($url, Config::$agent_name)) {
         Providers::change_url_status($url, Providers::URLS_TYPE_ROBOTS_NOT_ALLOWED);
         _w('Robots not allowed');
         return false;
     }
     // Create object
     $obj = new ContentAnalyzer($url);
     if (!$obj->getCONTENT_DATA()) {
         Providers::change_url_status($url, Providers::URLS_TYPE_ERROR_NO_DATA);
         return false;
     }
     return $obj;
 }
Esempio n. 2
0
<?php

include 'libs/general.php';
CronManager::init(__FILE__, 10);
try {
    // GET URLs
    $urls = Providers::get_lead_urls();
    _w('Got ' . count($urls) . ' urls');
    /// PROCESS URLS
    foreach ($urls as $url) {
        // Parse array to url
        $url_w = urls::create_url($url);
        _w('Getting url ' . $url_w);
        // Init content analyzer
        $ca = ContentAnalyzer::getAnalyzer($url_w);
        // If content ignored
        if (!$ca) {
            _w('ignored');
            // Skip
            continue;
        }
        //		_w($ca->getOgDescription());
        //		die;
        _w('Createing general CA data');
        _w('populating search table');
        if (Providers::create_search_item($ca)) {
            _w("search data inserted");
        } else {
            _w("unable to insert search data");
        }
        _w('setting status to indexed');
Esempio n. 3
0
 /**
  * Creating search item data
  * @param ContentAnalyzer $ca
  * @return type
  */
 public static function create_search_item(ContentAnalyzer $ca)
 {
     $desc = '';
     $meta = $ca->getMetaTags();
     if (isset($meta['description'])) {
         $desc = is_array($meta['description']) ? implode(' ', $meta['description']) : $meta['description'];
     }
     return db::create_fulltext_item($ca->getUrlId(), $ca->getTitle(), $desc, $ca->getPlainContent());
 }
Esempio n. 4
0
 /**
  * Creating search item data
  * @param ContentAnalyzer $ca
  * @return type
  */
 public static function create_search_item(ContentAnalyzer $ca)
 {
     $desc = '';
     $meta = $ca->getMetaTags();
     $title = $ca->getTitle();
     $url = $ca->getUrl();
     if ($title == '') {
         $title = $ca->getOgTitle();
     }
     $content = $ca->getPlainContent();
     if ($content == '') {
         $content = $ca->getOgDescription();
     }
     if ($title == '') {
         return false;
     }
     if (isset($meta['description'])) {
         $desc = is_array($meta['description']) ? implode(' ', $meta['description']) : $meta['description'];
     }
     $host = parse_url($url)['host'];
     $keywords = self::create_keywords_from_content($content . ' ' . $title, $url);
     if (isset($meta['keywords']) && !empty($meta['keywords'])) {
         $keywords = is_array($meta['keywords']) ? implode(' ', $meta['keywords']) : $meta['keywords'];
     }
     if ($content == '' && $desc != '') {
         $content = $desc;
     }
     if (str_word_count($content) < 5) {
         return false;
     }
     if ($content == "") {
         return false;
     }
     if (strlen($content) != strlen(utf8_decode($content))) {
         return false;
     }
     return db::create_fulltext_item($title, $content, $keywords, $url, $desc);
 }