Beispiel #1
0
 public function domain($domain_id = null)
 {
     log_message('info', 'FETCH : DOMAIN');
     $domains = $this->domain_model->list_domain("idle");
     if (count($domains) > 0) {
         foreach ($domains as $d) {
             if ($domain_id != null && $d->id != $domain_id) {
                 continue;
             }
             // if have group_pattern
             if ($d->group_pattern != null) {
                 $page = new Page_model();
                 $page->init();
                 $page->outdate = 0;
                 $page->domain_id = $d->id;
                 $page->parent_page_id = 0;
                 $page->parse_child = 0;
                 $page->parse_post = 0;
                 $root_url = $d->root_url;
                 $page->url = str_replace($root_url, '/', $d->url);
                 $page->active_score = 0;
                 $page->view = 0;
                 $page->sub_comment = 0;
                 $page->insert_date = mdate('%Y-%m-%d %H:%i', time());
                 $fetch = $page->fetch();
                 if ($fetch['content'] == null) {
                     echo "NULL RESULT" . PHP_EOL;
                     return false;
                 }
                 $html = str_get_html($fetch['content']);
                 $links = $html->find('a');
                 $html->clear();
                 unset($html);
                 echo PHP_EOL . 'links = ' . count($links);
                 foreach ($links as $element) {
                     $href = html_entity_decode($element->href);
                     $href = iconv("tis-620", "utf-8//TRANSLIT//IGNORE", $href);
                     if ($domain_id == 14 || $domain_id == 36 || $domain_id == 39 || $domain_id == 46 || $domain_id == 51 || $domain_id == 53 || $domain_id == 70 || $domain_id == 127) {
                         $str = explode('&', $href);
                         $href = $str[0];
                     }
                     if ($domain_id == 15 || $domain_id == 29 || $domain_id == 30) {
                         $str = explode('?', $href);
                         $href = $str[0];
                     }
                     if ($domain_id == 85) {
                         $str = explode('/hometh', $href);
                         $href = $str[count($str) - 1];
                         $str = explode('&key', $href);
                         $href = $str[0];
                     }
                     if ($domain_id == 95 || $domain_id == 99 || $domain_id == 103) {
                         //http://www.dvdgameonline.com/forums/index.php?s=25838fd9ce52ecb94f68daaf47d8a2a0&showforum=29
                         $str_a = explode('?', $href);
                         if (count($str_a) > 1) {
                             $str_b = explode('&', $str_a[1]);
                             if (count($str_b) > 1) {
                                 $href = $str_a[0] . '?' . $str_b[1];
                             }
                         }
                     }
                     // search "#" and truncate from url
                     if (strpos($href, "#") > 0) {
                         $href = substr($href, 0, strpos($href, "#"));
                     }
                     // search root_url and truncate
                     $root_url = $this->custom_model->get_value('domain', 'root_url', $d->id);
                     if (is_int(strpos($href, $root_url))) {
                         $href = str_replace($root_url, '/', $href);
                     }
                     // if href not start with '/' or '.' add '/'
                     if (mb_substr($href, 0, 1) != '/' && mb_substr($href, 0, 1) != '.') {
                         $href = '/' . $href;
                     }
                     echo PHP_EOL . 'url = ' . $href;
                     $res = $page->check_url($href, $page);
                     if ($res == "group") {
                         echo "(group)";
                         $url_id = $this->is_exist($href, $d->id);
                         log_message('info', ' domain ' . $d->id . ' : found group page : ' . $url_id);
                         if ($url_id == 0) {
                             log_message('info', ' domain : update_from_file : new ' . $res . ':' . $href);
                             $p = new Page_model();
                             $p->init();
                             $p->outdate = 0;
                             $p->domain_id = $d->id;
                             $p->parent_page_id = 0;
                             $p->url = $href;
                             $p->parse_child = 0;
                             $p->sub_comment = 0;
                             $p->insert_date = mdate('%Y-%m-%d %h:%i', time());
                             $p->insert();
                             unset($p);
                         }
                     }
                 }
                 unset($page);
             } else {
                 $len = strlen($d->root_url);
                 $url = substr($d->url, $len - 1);
                 // find existing pages
                 $option = array("url" => $url, "domain_id" => $d->id);
                 $pages = $this->page_model->find($option);
                 if (count($pages) == 0) {
                     $page = new Page_model();
                     $page->init();
                     $page->outdate = 0;
                     $page->domain_id = $d->id;
                     $page->parent_page_id = 0;
                     $page->parse_child = 0;
                     $page->parse_post = 0;
                     $page->url = $url;
                     $page->active_score = 0;
                     $page->view = 0;
                     $page->sub_comment = 0;
                     $page->insert_date = mdate('%Y-%m-%d %H:%i', time());
                     $id = $page->insert();
                     log_message('info', 'new page domain created : ' . $id);
                     unset($page);
                 }
             }
         }
     }
 }