public function domain($domain_id = null) { log_message('info', 'FETCH : DOMAIN'); $domains = $this->domain_model->list_domain("idle"); if (count($domains) > 0) { foreach ($domains as $d) { if ($domain_id != null && $d->id != $domain_id) { continue; } // if have group_pattern if ($d->group_pattern != null) { $page = new Page_model(); $page->init(); $page->outdate = 0; $page->domain_id = $d->id; $page->parent_page_id = 0; $page->parse_child = 0; $page->parse_post = 0; $root_url = $d->root_url; $page->url = str_replace($root_url, '/', $d->url); $page->active_score = 0; $page->view = 0; $page->sub_comment = 0; $page->insert_date = mdate('%Y-%m-%d %H:%i', time()); $fetch = $page->fetch(); if ($fetch['content'] == null) { echo "NULL RESULT" . PHP_EOL; return false; } $html = str_get_html($fetch['content']); $links = $html->find('a'); $html->clear(); unset($html); echo PHP_EOL . 'links = ' . count($links); foreach ($links as $element) { $href = html_entity_decode($element->href); $href = iconv("tis-620", "utf-8//TRANSLIT//IGNORE", $href); if ($domain_id == 14 || $domain_id == 36 || $domain_id == 39 || $domain_id == 46 || $domain_id == 51 || $domain_id == 53 || $domain_id == 70 || $domain_id == 127) { $str = explode('&', $href); $href = $str[0]; } if ($domain_id == 15 || $domain_id == 29 || $domain_id == 30) { $str = explode('?', $href); $href = $str[0]; } if ($domain_id == 85) { $str = explode('/hometh', $href); $href = $str[count($str) - 1]; $str = explode('&key', $href); $href = $str[0]; } if ($domain_id == 95 || $domain_id == 99 || $domain_id == 103) { //http://www.dvdgameonline.com/forums/index.php?s=25838fd9ce52ecb94f68daaf47d8a2a0&showforum=29 $str_a = explode('?', $href); if (count($str_a) > 1) { $str_b = explode('&', $str_a[1]); if (count($str_b) > 1) { $href = $str_a[0] . '?' . $str_b[1]; } } } // search "#" and truncate from url if (strpos($href, "#") > 0) { $href = substr($href, 0, strpos($href, "#")); } // search root_url and truncate $root_url = $this->custom_model->get_value('domain', 'root_url', $d->id); if (is_int(strpos($href, $root_url))) { $href = str_replace($root_url, '/', $href); } // if href not start with '/' or '.' add '/' if (mb_substr($href, 0, 1) != '/' && mb_substr($href, 0, 1) != '.') { $href = '/' . $href; } echo PHP_EOL . 'url = ' . $href; $res = $page->check_url($href, $page); if ($res == "group") { echo "(group)"; $url_id = $this->is_exist($href, $d->id); log_message('info', ' domain ' . $d->id . ' : found group page : ' . $url_id); if ($url_id == 0) { log_message('info', ' domain : update_from_file : new ' . $res . ':' . $href); $p = new Page_model(); $p->init(); $p->outdate = 0; $p->domain_id = $d->id; $p->parent_page_id = 0; $p->url = $href; $p->parse_child = 0; $p->sub_comment = 0; $p->insert_date = mdate('%Y-%m-%d %h:%i', time()); $p->insert(); unset($p); } } } unset($page); } else { $len = strlen($d->root_url); $url = substr($d->url, $len - 1); // find existing pages $option = array("url" => $url, "domain_id" => $d->id); $pages = $this->page_model->find($option); if (count($pages) == 0) { $page = new Page_model(); $page->init(); $page->outdate = 0; $page->domain_id = $d->id; $page->parent_page_id = 0; $page->parse_child = 0; $page->parse_post = 0; $page->url = $url; $page->active_score = 0; $page->view = 0; $page->sub_comment = 0; $page->insert_date = mdate('%Y-%m-%d %H:%i', time()); $id = $page->insert(); log_message('info', 'new page domain created : ' . $id); unset($page); } } } } }