public function run() { ini_set("max_execution_time", 100); ini_set('memory_limit', '256M'); require_once LIB_DIR . DS . 'simplehtmldom' . DS . 'simple_html_dom.php'; $conn = Gio_Db_Connection::getConnection(); $articleAutoDao = new Modules_News_Models_Mysql_Articleauto(); $articleAutoDao->setConnection($conn); $numArticles = 30; $articlesAuto = $articleAutoDao->find($numArticles); $articleDao = new Modules_News_Models_Mysql_Article(); $articleDao->setConnection($conn); /** * Auto Tags */ $autoTag = false; $allTags = array(); if ($autoTag) { // $tagDao = new Admin_Modules_News_Models_Mysql_Tag(); // $tagDao->setConnection($conn); // $allTags = $tagDao->getAllTags(); } if ($articlesAuto) { foreach ($articlesAuto as $index => $articleAuto) { $categories = explode('-', $articleAuto['category_ids']); $article = array('title' => trim($articleAuto['title']), 'slug' => trim($articleAuto['slug']), 'created_date' => $articleAuto['created_date'], 'category_id' => $categories[0], 'status' => 'active', 'description' => trim($articleAuto['description']), 'article_hot' => $index % 5 == 0 ? 1 : 0, 'article_sticky' => $index % 3 != 0 ? 1 : 0, 'link_source' => $articleAuto['link_source'], 'image_url' => $articleAuto['image_url'], 'web_id' => $articleAuto['website'], 'article_photo' => $articleAuto['article_photo'], 'article_video' => $articleAuto['article_video'], 'language' => 'vi_VN'); $existsArticle = $articleDao->getByLink($article['link_source']); $articleAutoDao->delete($articleAuto['auto_id']); if (null == $existsArticle) { switch ($articleAuto['website']) { case 'vtc': $article = $this->_vtc($article); break; case 'vnexpress': $article = $this->_vnexpress($article); break; case 'dantri': $article = $this->_dantri($article); break; case 'thethaovanhoa': $article = $this->_thethaovanhoa($article); break; case 'zing': $article = $this->_zing($article); break; case 'megafun': $article = $this->_megafun($article); break; case 'kenh14': $article = $this->_kenh14($article); break; case 'ngoisao': $article = $this->_ngoisao($article); break; case 'haitugio': $article = $this->_haitugio($article); break; case 'tuoitre': $article = $this->_tuoitre($article); break; case 'thanhnien': $article = $this->_thanhnien($article); break; case 'nguoilaodong': $article = $this->_nguoilaodong($article); break; case 'haisao': $article = $this->_haisao($article); break; case 'autopro': $article = $this->_autopro($article); break; case 'bongda': $article = $this->_bongda($article); break; case 'tintuconline': $article = $this->_tintuconline($article); break; } if ($article['title'] && $article['description'] && $article['content']) { $articleId = $articleDao->add($article); if ($articleId) { /** * Add to article category assoc */ $articleDao->addToCategories($articleId, $categories); } } } } } }
public function thanhnien($linkId = null) { ini_set("memory_limit", "256M"); $conn = Gio_Db_Connection::getConnection(); if ($linkId) { $ext = $linkId == -1 ? null : ' AND cron_id = ' . (int) $linkId; $sql = 'SELECT * FROM ' . $conn->_tablePrefix . 'news_cron WHERE website_name="thanhnien" AND is_locked = 0 AND status = "active"' . $ext; } else { $numCategories = isset($this->_configs['auto_update_num_category']) ? $this->_configs['auto_update_num_category'] : 5; $sql = 'SELECT * FROM ' . $conn->_tablePrefix . 'news_cron WHERE status = "active" AND TIMESTAMPDIFF(SECOND, last_update, "' . date('Y-m-d H:i:s') . '") > frequency AND website_name = "thanhnien" AND is_locked = 0 ORDER BY priority LIMIT ' . (int) $numCategories; } $rsSql = $conn->query($sql); $addArticles = array(); $addCategories = array(); $articleDao = new Modules_News_Models_Mysql_Article(); $articleDao->setConnection($conn); while ($row = $conn->fetchAll($rsSql)) { /** * Get html from link */ if (($htmlString = @file_get_contents($row['source'])) === false) { continue; } $html = str_get_html($htmlString); /** * Get main container */ $categories = explode('-', $row['category_ids']); $container = $html->find('div.newstype', 0); $articles = $container->find('div.cm-content'); foreach ($articles as $index => $item) { $linkSource = $item->find('a', 0)->href; $link = $linkSource; /** * Get article title */ $titleContainer = $item->find('a', 0); $title = $titleContainer->plaintext; $image = $item->find('img', 0); $description = $item->find('div.cm-des', 0)->innertext; /** * Check article exists */ $existsArticle = $articleDao->getBySlug(Gio_Core_String::removeSign(trim($title), '-', true)); if (null == $existsArticle) { $article = array('title' => trim($title), 'slug' => Gio_Core_String::removeSign($title, '-', true), 'description' => trim(strip_tags($description)), 'created_date' => date('Y-m-d H:i:s'), 'link_source' => $link, 'image_url' => isset($image->src) ? $row['website'] . $image->src : null, 'category_ids' => $row['category_ids'], 'website' => 'thanhnien'); $addArticles[] = $article; $addCategories[] = $categories; } } $lastArticle = $container->find('div.cm-content-last', 0); if ($lastArticle && isset($lastArticle->innertext) && $lastArticle->innertext != null) { $linkSource = $lastArticle->find('a', 0)->href; $link = $linkSource; /** * Get article title */ $titleContainer = $lastArticle->find('a', 0); $title = $titleContainer->plaintext; $image = $lastArticle->find('img', 0); $description = $lastArticle->find('div.cm-des', 0)->innertext; $existsArticle = $articleDao->getByLink($link); if (null == $existsArticle) { $article = array('title' => trim($title), 'slug' => Gio_Core_String::removeSign($title, '-', true), 'description' => trim(strip_tags($description)), 'created_date' => date('Y-m-d H:i:s'), 'link_source' => $link, 'image_url' => isset($image->src) ? $row['website'] . $image->src : null, 'category_ids' => $row['category_ids'], 'website' => 'thanhnien'); $addArticles[] = $article; $addCategories[] = $categories; } } /** * Update time for cron */ $sql = 'UPDATE ' . $conn->_tablePrefix . 'news_cron SET last_update = "' . date('Y-m-d H:i:s') . '" WHERE cron_id = ' . $conn->escape($row['cron_id']); $rsUpdate = $conn->query($sql); $conn->freeResult($rsUpdate); } $conn->freeResult($rsSql); /** * Add articles, Add Categories */ if ($addArticles && $addCategories) { $this->addArticles[] = $addArticles; $this->addCategories[] = $addCategories; } }