public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = file_get_html($url); $text = $html2->find('div.entry-content', 0)->innertext; $text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text); $text = preg_replace('@<div[^>]*?>.*?</div>@si', '', $text); $text = preg_replace("/<h1.*/", '', $text); return $text; } $html = file_get_html('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404); $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 10) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = file_get_html($url); $text = $html2->find('div.single-contenu', 0)->innertext; return $text; } $html = file_get_html('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404); $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 10) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function ExtractContent($url) { $html2 = file_get_html($url); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text); return $text; } if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; $html = file_get_html('http://www.ledauphine.com/' . $this->request . '/rss') or $this->returnError('Could not request DauphineLibere.', 404); } else { $html = file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404); } $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 10) { $item = new \Item(); $item->title = $element->find('title', 0)->innertext; $item->uri = $element->find('guid', 0)->plaintext; $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = file_get_html($url); $text = $html2->find('div.texte', 0)->innertext; return $text; } $html = file_get_html('http://www.acrimed.org/spip.php?page=backend') or $this->returnError('Could not request Acrimed.', 404); $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 10) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = file_get_html($url); $text = $html2->find('article#page', 0)->innertext; $text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text); return $text; } $html = file_get_html('http://www.tuxboard.com/feed/atom/') or $this->returnError('Could not request Tuxboard.', 404); $limit = 0; foreach ($html->find('entry') as $element) { if ($limit < 10) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = $element->find('link', 0)->href; $item->timestamp = strtotime($element->find('published', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = $this->file_get_html($url); $text = '<p><em>' . $html2->find('span.sub_title', 0)->innertext . '</em></p>' . '<p><img src="' . $html2->find('div.container_main_image_article', 0)->find('img.dedicated', 0)->src . '" alt="-" /></p>' . '<div>' . $html2->find('div[itemprop=articleBody]', 0)->innertext . '</div>'; $premium_article = $html2->find('h2.title_reserve_article', 0); if (is_object($premium_article)) { $text = $text . '<p><em>' . $premium_article->innertext . '</em></p>'; } return $text; } $html = $this->file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404); $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 3) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); $item->thumbnailUri = StripCDATA($element->find('enclosure', 0)->url); $item->author = StripCDATA($element->find('author', 0)->innertext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }
public function collectData(array $param) { function StripCDATA($string) { $string = str_replace('<![CDATA[', '', $string); $string = str_replace(']]>', '', $string); return $string; } function ExtractContent($url) { $html2 = file_get_html($url); $text = '<h2>' . $html2->find('div#actu_entete > h2', 0)->innertext . '</h2><br><br>'; $text = $text . $html2->find('div[itemprop=articleBody]', 0)->innertext; return $text; } $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); $limit = 0; foreach ($html->find('item') as $element) { if ($limit < 3) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->content = ExtractContent($item->uri); $this->items[] = $item; $limit++; } } }