public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     $html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request EstCeQuonMetEnProd: ' . $this->getURI(), 500);
     $img = $html->find('img', 0);
     if (is_object($img)) {
         $img = $img->src;
         if ($img[0] == '/') {
             $img = substr($this->getURI(), 0, strlen($this->getURI()) - 1) . $img;
         }
     }
     $item = new \Item();
     $item->uri = $this->getURI() . '#' . date('Y-m-d');
     $item->thumbnailUri = $img;
     $item->title = $this->getName();
     $item->author = 'Nicolas Hoffmann';
     $item->timestamp = strtotime('today midnight');
     $item->content = str_replace('src="/', 'src="' . $this->getURI(), trim(ExtractFromDelimiters($html->outertext, '<body role="document">', '<br /><br />')));
     $this->items[] = $item;
 }
Example #2
0
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     $category = $param['category'];
     if (empty($category)) {
         $category = 'all';
     }
     if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) {
         $this->returnError('Invalid "category" parameter.', 400);
     }
     $url = $this->getURI() . 'rss/' . $category . '/';
     $html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: ' . $url, 500);
     $limit = 0;
     foreach ($html->find('item') as $element) {
         if ($limit < 10) {
             $article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
             $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', '</dc:creator>');
             $article_title = $element->find('title', 0)->plaintext;
             $article_subtitle = $element->find('description', 0)->plaintext;
             $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
             $article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
             $article = $this->file_get_html($article_url) or $this->returnError('Could not request Nextgov: ' . $article_url, 500);
             $contents = $article->find('div.wysiwyg', 0)->innertext;
             $contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
             $contents = StripWithDelimiters($contents, '<div', '</div>');
             //ad outer div
             $contents = StripWithDelimiters($contents, '<script', '</script>');
             $contents = ($article_thumbnail == '' ? '' : '<p><img src="' . $article_thumbnail . '" /></p>') . '<p><b>' . $article_subtitle . '</b></p>' . trim($contents);
             if ($article_thumbnail == '') {
                 $article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png';
             }
             $item = new \Item();
             $item->uri = $article_url;
             $item->title = $article_title;
             $item->author = $article_author;
             $item->thumbnailUri = $article_thumbnail;
             $item->timestamp = $article_timestamp;
             $item->content = $contents;
             $this->items[] = $item;
             $limit++;
         }
     }
 }
Example #3
0
 public function collectData(array $param)
 {
     //Utility function for retrieving text based on start and end delimiters
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     //Ensure proper parameters have been provided
     if (empty($param['search'])) {
         $this->returnError('You must specify a search criteria', 400);
     }
     //Retrieve torrent listing as truncated rss, which does not contain torrent description
     $url = 'http://www.t411.in/torrents/search/?' . $param['search'] . '&order=added&type=desc';
     $html = file_get_html($url) or $this->returnError('Could not request t411: ' . $url, 500);
     $results = $html->find('table.results')[0] or $this->returnError('No results from t411: ' . $url, 500);
     $limit = 0;
     //Process each item individually
     foreach ($results->find('tr') as $element) {
         //Limit total amount of requests
         if ($limit < 5) {
             //Requests are rate-limited
             sleep(1);
             //So we need to wait
             //Retrieve data from RSS entry
             $item_uri = 'http://' . ExtractFromDelimiters($element->outertext, '<a href="//', '"');
             $item_title = ExtractFromDelimiters($element->outertext, '" title="', '"');
             $item_date = strtotime($element->find('dd', 0)->plaintext);
             //Retrieve full description from torrent page
             if ($item_html = file_get_html($item_uri)) {
                 //Retrieve data from page contents
                 $item_desc = $item_html->find('div.description', 0);
                 $item_author = $item_html->find('a.profile', 0)->innertext;
                 //Retrieve image for thumbnail or generic logo fallback
                 $item_image = 'http://www.t411.in/themes/blue/images/logo.png';
                 foreach ($item_desc->find('img') as $img) {
                     if (strpos($img->src, 'dreamprez') === false) {
                         $item_image = $img->src;
                         break;
                     }
                 }
                 //Build and add final item
                 $item = new \Item();
                 $item->uri = $item_uri;
                 $item->title = $item_title;
                 $item->author = $item_author;
                 $item->timestamp = $item_date;
                 $item->thumbnailUri = $item_image;
                 $item->content = utf8_encode($item_desc->innertext);
                 $this->items[] = $item;
                 $limit++;
             }
         }
     }
 }
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     $feed = $this->getURI() . 'feed/';
     $html = $this->file_get_html($feed) or $this->returnError('Could not request ' . $this->getName() . ': ' . $feed, 500);
     $limit = 0;
     foreach ($html->find('item') as $element) {
         if ($limit < 5) {
             $article_image = $element->find('image', 0)->plaintext;
             $article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
             $article_summary = ExtractFromDelimiters($element->innertext, '<description><![CDATA[<p>', '</p>');
             $article_html = file_get_contents($article_url) or $this->returnError('Could not request ' . $this->getName() . ': ' . $article_url, 500);
             if (substr($article_html, 0, 2) == "‹") {
                 //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1
                 $article_html = gzdecode($article_html);
             }
             //Response is GZipped even if we didn't accept GZip!? Let's decompress...
             $article_html = str_get_html($article_html);
             //Now we have our HTML data. But still, that's an important HTTP violation...
             $article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
             $article_content = StripWithDelimiters($article_content, '<script', '</script>');
             $article_content = '<p><img src="' . $article_image . '" /></p>' . '<p><b>' . $article_summary . '</b></p>' . trim($article_content);
             $item = new \Item();
             $item->uri = $article_url;
             $item->thumbnailUri = $article_image;
             $item->title = $element->find('title', 0)->plaintext;
             $item->author = $article_html->find('a[rel=author]', 0)->plaintext;
             $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
             $item->content = $article_content;
             $this->items[] = $item;
             $limit++;
         }
     }
 }
Example #5
0
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     function CleanArticle($article_html)
     {
         $article_html = '<p>' . substr($article_html, strpos($article_html, '<p>') + 3);
         $article_html = StripWithDelimiters($article_html, '<span class="credit">', '</span>');
         $article_html = StripWithDelimiters($article_html, '<script', '</script>');
         $article_html = StripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>');
         $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>');
         return $article_html;
     }
     if (!empty($param['topic'])) {
         $this->topicName = $param['topic'];
     }
     $pageUrl = 'http://www.cnet.com/' . (empty($this->topicName) ? '' : 'topics/' . $this->topicName . '/');
     $html = $this->file_get_html($pageUrl) or $this->returnError('Could not request CNET: ' . $pageUrl, 500);
     $limit = 0;
     foreach ($html->find('div.assetBody') as $element) {
         if ($limit < 8) {
             $article_title = trim($element->find('h2', 0)->plaintext);
             $article_uri = 'http://www.cnet.com' . $element->find('a', 0)->href;
             $article_thumbnail = $element->parent()->find('img', 0)->src;
             $article_timestamp = strtotime($element->find('time.assetTime', 0)->plaintext);
             $article_author = trim($element->find('a[rel=author]', 0)->plaintext);
             if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
                 $article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request CNET: ' . $article_uri, 500);
                 if (is_null($article_thumbnail)) {
                     $article_thumbnail = $article_html->find('div.originalImage', 0);
                 }
                 if (is_null($article_thumbnail)) {
                     $article_thumbnail = $article_html->find('span.imageContainer', 0);
                 }
                 if (is_object($article_thumbnail)) {
                     $article_thumbnail = $article_thumbnail->find('img', 0)->src;
                 }
                 $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '<div class="articleContent', '<footer>')));
                 $item = new \Item();
                 $item->uri = $article_uri;
                 $item->thumbnailUri = $article_thumbnail;
                 $item->title = $article_title;
                 $item->author = $article_author;
                 $item->timestamp = $article_timestamp;
                 $item->content = $article_content;
                 $this->items[] = $item;
                 $limit++;
             }
         }
     }
 }
Example #6
0
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     function CleanArticle($article_html)
     {
         $article_html = '<p>' . substr($article_html, strpos($article_html, '</script></div><p>') + 18);
         $article_html = StripWithDelimiters($article_html, '<script>', '</script>');
         $article_html = StripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>');
         $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>');
         return $article_html;
     }
     if (!empty($param['topic'])) {
         $this->topicName = $param['topic'];
     }
     $pageUrl = 'http://www.cnet.com/' . (empty($this->topicName) ? '' : 'topics/' . $this->topicName . '/');
     $html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: ' . $pageUrl, 500);
     $limit = 0;
     foreach ($html->find('div.socialSharingSmall') as $element) {
         if ($limit < 4) {
             $article_meta = json_decode(ExtractFromDelimiters($element->outertext, 'data-social-counts-options=\'', '\'>'));
             $article_title = $article_meta->title;
             $article_uri = $article_meta->url;
             if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
                 $article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: ' . $article_uri, 500);
                 $article_timestamp = strtotime(ExtractFromDelimiters($article_html->innertext, '<time itemprop="datePublished" class="', '">'));
                 $article_thumbnail = $article_html->find('div.originalImage', 0);
                 if (is_null($article_thumbnail)) {
                     $article_thumbnail = $article_html->find('span.imageContainer', 0);
                 }
                 if (!is_null($article_thumbnail)) {
                     $article_thumbnail = $article_thumbnail->find('img', 0)->src;
                 }
                 $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '</div></div></div><div class="col-8">', '<footer>')));
                 $article_author = trim($article_html->find('a.author', 0)->plaintext);
                 $item = new \Item();
                 $item->uri = $article_uri;
                 $item->thumbnailUri = $article_thumbnail;
                 $item->title = $article_title;
                 $item->author = $article_author;
                 $item->timestamp = $article_timestamp;
                 $item->content = $article_content;
                 $this->items[] = $item;
                 $limit++;
             }
         }
     }
 }
Example #7
0
 public function collectData(array $param)
 {
     //Extract a string using start and end delimiters
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     //Utility function for cleaning a Facebook link
     $unescape_fb_link = function ($matches) {
         if (is_array($matches) && count($matches) > 1) {
             $link = $matches[1];
             if (strpos($link, '/') === 0) {
                 $link = 'https://www.facebook.com' . $link . '"';
             }
             if (strpos($link, 'facebook.com/l.php?u=') !== false) {
                 $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
             }
             return ' href="' . $link . '"';
         }
     };
     //Utility function for converting facebook emoticons
     $unescape_fb_emote = function ($matches) {
         static $facebook_emoticons = array('smile' => ':)', 'frown' => ':(', 'tongue' => ':P', 'grin' => ':D', 'gasp' => ':O', 'wink' => ';)', 'pacman' => ':<', 'grumpy' => '>_<', 'unsure' => ':/', 'cry' => ':\'(', 'kiki' => '^_^', 'glasses' => '8-)', 'sunglasses' => 'B-)', 'heart' => '<3', 'devil' => ']:D', 'angel' => '0:)', 'squint' => '-_-', 'confused' => 'o_O', 'upset' => 'xD', 'colonthree' => ':3', 'like' => '&#x1F44D;');
         $len = count($matches);
         if ($len > 1) {
             for ($i = 1; $i < $len; $i++) {
                 foreach ($facebook_emoticons as $name => $emote) {
                     if ($matches[$i] === $name) {
                         return $emote;
                     }
                 }
             }
         }
         return $matches[0];
     };
     $html = null;
     //Handle captcha response sent by the viewer
     if (isset($_POST['captcha_response'])) {
         if (session_status() == PHP_SESSION_NONE) {
             session_start();
         }
         if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) {
             $captcha_action = $_SESSION['captcha_action'];
             $captcha_fields = $_SESSION['captcha_fields'];
             $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
             $http_options = array('http' => array('method' => 'POST', 'user_agent' => ini_get('user_agent'), 'header' => array("Content-type: application/x-www-form-urlencoded\r\nReferer: {$captcha_action}\r\nCookie: noscript=1\r\n"), 'content' => http_build_query($captcha_fields)));
             $context = stream_context_create($http_options);
             $html = file_get_contents($captcha_action, false, $context);
             if ($html === FALSE) {
                 $this->returnError('Failed to submit captcha response back to Facebook', 500);
             }
             unset($_SESSION['captcha_fields']);
             $html = str_get_html($html);
         }
         unset($_SESSION['captcha_fields']);
         unset($_SESSION['captcha_action']);
     }
     //Retrieve page contents
     if (is_null($html)) {
         if (isset($param['u'])) {
             if (!strpos($param['u'], "/")) {
                 $html = $this->file_get_html('https://www.facebook.com/' . urlencode($param['u']) . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
             } else {
                 $html = $this->file_get_html('https://www.facebook.com/pages/' . $param['u'] . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
             }
         } else {
             $this->returnError('You must specify a Facebook username.', 400);
         }
     }
     //Handle captcha form?
     $captcha = $html->find('div.captcha_interstitial', 0);
     if (!is_null($captcha)) {
         //Save form for submitting after getting captcha response
         if (session_status() == PHP_SESSION_NONE) {
             session_start();
         }
         $captcha_fields = array();
         foreach ($captcha->find('input, button') as $input) {
             $captcha_fields[$input->name] = $input->value;
         }
         $_SESSION['captcha_fields'] = $captcha_fields;
         $_SESSION['captcha_action'] = 'https://www.facebook.com' . $captcha->find('form', 0)->action;
         //Show captcha filling form to the viewer, proxying the captcha image
         $img = base64_encode(file_get_contents($captcha->find('img', 0)->src));
         header('HTTP/1.1 500 ' . Http::getMessageForCode(500));
         header('Content-Type: text/html');
         die('<form method="post" action="?' . $_SERVER['QUERY_STRING'] . '">' . '<h2>Facebook captcha challenge</h2>' . '<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />' . 'Facebook wants rss-bridge to resolve the following captcha:</p>' . '<p><img src="data:image/png;base64,' . $img . '" /></p>' . '<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />' . '<input type="submit" value="Submit!" /></p>' . '</form>');
     }
     //No captcha? We can carry on retrieving page contents :)
     $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0);
     if (isset($element)) {
         $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
         $profilePic = 'https://graph.facebook.com/' . $param['u'] . '/picture?width=200&amp;height=200';
         $this->name = $author;
         foreach ($element->children() as $post) {
             $item = new \Item();
             if (count($post->find('abbr')) > 0) {
                 //Retrieve post contents
                 $content = preg_replace('/(?i)><div class=\\"clearfix([^>]+)>(.+?)div\\ class=\\"userContent\\"/i', '', $post);
                 $content = preg_replace('/(?i)><div class=\\"_59tj([^>]+)>(.+?)<\\/div><\\/div><a/i', '', $content);
                 $content = preg_replace('/(?i)><div class=\\"_3dp([^>]+)>(.+?)div\\ class=\\"[^u]+userContent\\"/i', '', $content);
                 $content = preg_replace('/(?i)><div class=\\"_4l5([^>]+)>(.+?)<\\/div>/i', '', $content);
                 //Remove html nodes, keep only img, links, basic formatting
                 $content = strip_tags($content, '<a><img><i><u>');
                 //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
                 $content = preg_replace_callback('/ href=\\"([^"]+)\\"/i', $unescape_fb_link, $content);
                 //Clean useless html tag properties and fix link closing tags
                 foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex', 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name) {
                     $content = preg_replace('/ ' . $property_name . '=\\"[^"]*\\"/i', '', $content);
                 }
                 $content = preg_replace('/<\\/a [^>]+>/i', '</a>', $content);
                 //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
                 $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\\/u><\\/i>/i', $unescape_fb_emote, $content);
                 //Retrieve date of the post
                 $date = $post->find("abbr")[0];
                 if (isset($date) && $date->hasAttribute('data-utime')) {
                     $date = $date->getAttribute('data-utime');
                 } else {
                     $date = 0;
                 }
                 //Build title from username and content
                 $title = $author;
                 if (strlen($title) > 24) {
                     $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
                 }
                 $title = $title . ' | ' . strip_tags($content);
                 if (strlen($title) > 64) {
                     $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
                 }
                 //Use first image as thumbnail if available, or profile pic fallback
                 $thumbnail = $post->find('img', 1);
                 if (is_object($thumbnail)) {
                     $thumbnail = $thumbnail->src;
                 } else {
                     $thumbnail = $profilePic;
                 }
                 //Build and add final item
                 $item->uri = 'https://facebook.com' . $post->find('abbr')[0]->parent()->getAttribute('href');
                 $item->thumbnailUri = $thumbnail;
                 $item->content = $content;
                 $item->title = $title;
                 $item->author = $author;
                 $item->timestamp = $date;
                 $this->items[] = $item;
             }
         }
     }
 }
Example #8
0
 public function collectData(array $param)
 {
     //Extract a string using start and end delimiters
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     //Utility function for cleaning a Facebook link
     $unescape_fb_link = function ($matches) {
         if (is_array($matches) && count($matches) > 1) {
             $link = $matches[1];
             if (strpos($link, '/') === 0) {
                 $link = 'https://facebook.com' . $link . '"';
             }
             if (strpos($link, 'facebook.com/l.php?u=') !== false) {
                 $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
             }
             return ' href="' . $link . '"';
         }
     };
     //Utility function for converting facebook emoticons
     $unescape_fb_emote = function ($matches) {
         static $facebook_emoticons = array('smile' => ':)', 'frown' => ':(', 'tongue' => ':P', 'grin' => ':D', 'gasp' => ':O', 'wink' => ';)', 'pacman' => ':<', 'grumpy' => '>_<', 'unsure' => ':/', 'cry' => ':\'(', 'kiki' => '^_^', 'glasses' => '8-)', 'sunglasses' => 'B-)', 'heart' => '<3', 'devil' => ']:D', 'angel' => '0:)', 'squint' => '-_-', 'confused' => 'o_O', 'upset' => 'xD', 'colonthree' => ':3', 'like' => '&#x1F44D;');
         $len = count($matches);
         if ($len > 1) {
             for ($i = 1; $i < $len; $i++) {
                 foreach ($facebook_emoticons as $name => $emote) {
                     if ($matches[$i] === $name) {
                         return $emote;
                     }
                 }
             }
         }
         return $matches[0];
     };
     $html = '';
     if (isset($param['u'])) {
         if (!strpos($param['u'], "/")) {
             $html = file_get_html('https://facebook.com/' . urlencode($param['u']) . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
         } else {
             $html = file_get_html('https://facebook.com/pages/' . $param['u'] . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
         }
     } else {
         $this->returnError('You must specify a Facebook username.', 400);
     }
     $element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
     if (isset($element)) {
         $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
         $profilePic = 'https://graph.facebook.com/' . $param['u'] . '/picture?width=200&amp;height=200';
         $this->name = $author;
         foreach ($element->children() as $post) {
             $item = new \Item();
             if ($post->hasAttribute("data-time")) {
                 //Retrieve post contents
                 $content = preg_replace('/(?i)><div class=\\"clearfix([^>]+)>(.+?)div\\ class=\\"userContent\\"/i', '', $post);
                 $content = preg_replace('/(?i)><div class=\\"_59tj([^>]+)>(.+?)<\\/div><\\/div><a/i', '', $content);
                 $content = preg_replace('/(?i)><div class=\\"_3dp([^>]+)>(.+?)div\\ class=\\"[^u]+userContent\\"/i', '', $content);
                 $content = preg_replace('/(?i)><div class=\\"_4l5([^>]+)>(.+?)<\\/div>/i', '', $content);
                 //Remove html nodes, keep only img, links, basic formatting
                 $content = strip_tags($content, '<a><img><i><u>');
                 //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
                 $content = preg_replace_callback('/ href=\\"([^"]+)\\"/i', $unescape_fb_link, $content);
                 //Clean useless html tag properties and fix link closing tags
                 foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex', 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name) {
                     $content = preg_replace('/ ' . $property_name . '=\\"[^"]*\\"/i', '', $content);
                 }
                 $content = preg_replace('/<\\/a [^>]+>/i', '</a>', $content);
                 //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
                 $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\\/u><\\/i>/i', $unescape_fb_emote, $content);
                 //Retrieve date of the post
                 $date = $post->find("abbr")[0];
                 if (isset($date) && $date->hasAttribute('data-utime')) {
                     $date = $date->getAttribute('data-utime');
                 } else {
                     $date = 0;
                 }
                 //Build title from username and content
                 $title = $author;
                 if (strlen($title) > 24) {
                     $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
                 }
                 $title = $title . ' | ' . strip_tags($content);
                 if (strlen($title) > 64) {
                     $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
                 }
                 //Use first image as thumbnail if available, or profile pic fallback
                 $thumbnail = $post->find('img', 1)->src;
                 if (strlen($thumbnail) == 0) {
                     $thumbnail = $profilePic;
                 }
                 //Build and add final item
                 $item->uri = 'https://facebook.com' . str_replace('&amp;', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
                 $item->thumbnailUri = $thumbnail;
                 $item->content = $content;
                 $item->title = $title;
                 $item->author = $author;
                 $item->timestamp = $date;
                 $this->items[] = $item;
             }
         }
     }
 }
Example #9
0
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function TypeToString($type)
     {
         switch ($type) {
             case 1:
                 return '3DS Game';
             case 4:
                 return 'eShop';
             default:
                 return '??? (' . $type . ')';
         }
     }
     function CardToString($card)
     {
         switch ($card) {
             case 1:
                 return 'Regular (CARD1)';
             case 2:
                 return 'NAND (CARD2)';
             default:
                 return '??? (' . $card . ')';
         }
     }
     $dataUrl = 'http://3dsdb.com/xml.php';
     $xml = file_get_contents($dataUrl) or $this->returnError('Could not request 3dsdb: ' . $dataUrl, 500);
     $limit = 0;
     foreach (array_reverse(explode('<release>', $xml)) as $element) {
         if ($limit < 5) {
             if (strpos($element, '</release>') !== false) {
                 $id = ExtractFromDelimiters($element, '<id>', '</id>');
                 $name = ExtractFromDelimiters($element, '<name>', '</name>');
                 $publisher = ExtractFromDelimiters($element, '<publisher>', '</publisher>');
                 $region = ExtractFromDelimiters($element, '<region>', '</region>');
                 $group = ExtractFromDelimiters($element, '<group>', '</group>');
                 $imagesize = ExtractFromDelimiters($element, '<imagesize>', '</imagesize>');
                 $serial = ExtractFromDelimiters($element, '<serial>', '</serial>');
                 $titleid = ExtractFromDelimiters($element, '<titleid>', '</titleid>');
                 $imgcrc = ExtractFromDelimiters($element, '<imgcrc>', '</imgcrc>');
                 $filename = ExtractFromDelimiters($element, '<filename>', '</filename>');
                 $releasename = ExtractFromDelimiters($element, '<releasename>', '</releasename>');
                 $trimmedsize = ExtractFromDelimiters($element, '<trimmedsize>', '</trimmedsize>');
                 $firmware = ExtractFromDelimiters($element, '<firmware>', '</firmware>');
                 $type = ExtractFromDelimiters($element, '<type>', '</type>');
                 $card = ExtractFromDelimiters($element, '<card>', '</card>');
                 if (!empty($releasename)) {
                     //Retrieve cover art and short desc from IGN?
                     $ignResult = false;
                     $ignDescription = '';
                     $ignLink = '';
                     $ignDate = time();
                     $ignCoverArt = '';
                     $ignSearchUrl = 'http://www.ign.com/search?q=' . urlencode($name);
                     if ($ignResult = file_get_html($ignSearchUrl)) {
                         $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src;
                         $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext;
                         $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href;
                         $ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext));
                         $ignDescription = '<div><img src="' . $ignCoverArt . '" /></div><div>' . $ignDesc . ' <a href="' . $ignLink . '">More at IGN</a></div>';
                     }
                     //Main section : Release description from 3DS database
                     $releaseDescription = '<h3>Release Details</h3>' . '<b>Release ID: </b>' . $id . '<br />' . '<b>Game Name: </b>' . $name . '<br />' . '<b>Publisher: </b>' . $publisher . '<br />' . '<b>Region: </b>' . $region . '<br />' . '<b>Group: </b>' . $group . '<br />' . '<b>Image size: </b>' . intval($imagesize) / 8 . 'MB<br />' . '<b>Serial: </b>' . $serial . '<br />' . '<b>Title ID: </b>' . $titleid . '<br />' . '<b>Image CRC: </b>' . $imgcrc . '<br />' . '<b>File Name: </b>' . $filename . '<br />' . '<b>Release Name: </b>' . $releasename . '<br />' . '<b>Trimmed size: </b>' . intval(intval($trimmedsize) / 1048576) . 'MB<br />' . '<b>Firmware: </b>' . $firmware . '<br />' . '<b>Type: </b>' . TypeToString($type) . '<br />' . '<b>Card: </b>' . CardToString($card) . '<br />';
                     //Build search links section to facilitate release search using search engines
                     $releaseNameEncoded = urlencode(str_replace(' ', '+', $releasename));
                     $searchLinkGoogle = 'https://google.com/?q=' . $releaseNameEncoded;
                     $searchLinkDuckDuckGo = 'https://duckduckgo.com/?q=' . $releaseNameEncoded;
                     $searchLinkQwant = 'https://lite.qwant.com/?q=' . $releaseNameEncoded . '&t=web';
                     $releaseSearchLinks = '<h3>Search this release</h3><ul>' . '<li><a href="' . $searchLinkGoogle . '">Search using Google</a></li>' . '<li><a href="' . $searchLinkDuckDuckGo . '">Search using DuckDuckGo</a></li>' . '<li><a href="' . $searchLinkQwant . '">Search using Qwant</a></li>' . '</ul>';
                     //Build and add final item with the above three sections
                     $item = new \Item();
                     $item->title = $name;
                     $item->author = $publisher;
                     $item->timestamp = $ignDate;
                     $item->thumbnailUri = $ignCoverArt;
                     $item->uri = empty($ignLink) ? $searchLinkDuckDuckGo : $ignLink;
                     $item->content = $ignDescription . $releaseDescription . $releaseSearchLinks;
                     $this->items[] = $item;
                     $limit++;
                 }
             }
         }
     }
 }
Example #10
0
 public function collectData(array $param)
 {
     function StripCDATA($string)
     {
         $string = str_replace('<![CDATA[', '', $string);
         $string = str_replace(']]>', '', $string);
         return trim($string);
     }
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     function StripRecursiveHTMLSection($string, $tag_name, $tag_start)
     {
         $open_tag = '<' . $tag_name;
         $close_tag = '</' . $tag_name . '>';
         $close_tag_length = strlen($close_tag);
         if (strpos($tag_start, $open_tag) === 0) {
             while (strpos($string, $tag_start) !== false) {
                 $max_recursion = 100;
                 $section_to_remove = null;
                 $section_start = strpos($string, $tag_start);
                 $search_offset = $section_start;
                 do {
                     $max_recursion--;
                     $section_end = strpos($string, $close_tag, $search_offset);
                     $search_offset = $section_end + $close_tag_length;
                     $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length);
                     $open_tag_count = substr_count($section_to_remove, $open_tag);
                     $close_tag_count = substr_count($section_to_remove, $close_tag);
                 } while ($open_tag_count > $close_tag_count && $max_recursion > 0);
                 $string = str_replace($section_to_remove, '', $string);
             }
         }
         return $string;
     }
     $baseUri = $this->getURI();
     $feed = $param['feed'];
     if (empty($feed)) {
         $this->returnError('Please select a feed to display.', 400);
     }
     if (strpos($feed, 'downloads!') !== false) {
         $feed = str_replace('downloads!', '', $feed);
         $baseUri = str_replace('www.', 'downloads.', $baseUri);
     }
     if ($feed !== preg_replace('/[^a-zA-Z0-9-\\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64)) {
         $this->returnError('Invalid "feed" parameter.', 400);
     }
     $url = $baseUri . trim($feed, '/') . '/rss.xml';
     $html = $this->file_get_html($url) or $this->returnError('Could not request ZDNet: ' . $url, 500);
     $limit = 0;
     foreach ($html->find('item') as $element) {
         if ($limit < 10) {
             $article_url = preg_replace('/([^#]+)#ftag=.*/', '$1', StripCDATA(ExtractFromDelimiters($element->innertext, '<link>', '</link>')));
             $article_author = StripCDATA(ExtractFromDelimiters($element->innertext, 'role="author">', '<'));
             $article_title = StripCDATA($element->find('title', 0)->plaintext);
             $article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
             $article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
             $article = $this->file_get_html($article_url) or $this->returnError('Could not request ZDNet: ' . $article_url, 500);
             if (!empty($article_author)) {
                 $author = $article_author;
             } else {
                 $author = $article->find('meta[name=author]', 0);
                 if (is_object($author)) {
                     $author = $author->content;
                 } else {
                     $author = 'ZDNet';
                 }
             }
             $thumbnail = $article->find('meta[itemprop=image]', 0);
             if (is_object($thumbnail)) {
                 $thumbnail = $thumbnail->content;
             } else {
                 $thumbnail = '';
             }
             $contents = $article->find('article', 0)->innertext;
             foreach (array('<div class="shareBar"', '<div class="shortcodeGalleryWrapper"', '<div class="relatedContent', '<div class="downloadNow', '<div data-shortcode', '<div id="sharethrough', '<div id="inpage-video') as $div_start) {
                 $contents = StripRecursiveHTMLSection($contents, 'div', $div_start);
             }
             $contents = StripWithDelimiters($contents, '<script', '</script>');
             $contents = StripWithDelimiters($contents, '<meta itemprop="image"', '>');
             $contents = trim(StripWithDelimiters($contents, '<section class="sharethrough-top', '</section>'));
             $content_img = strpos($contents, '<img');
             //Look for first image
             if ($content_img !== false && $content_img < 512 || $thumbnail == '') {
                 $content_img = '';
             } else {
                 $content_img = '<p><img src="' . $thumbnail . '" /></p>';
             }
             //Include thumbnail
             $contents = $content_img . '<p><b>' . $article_subtitle . '</b></p>' . $contents;
             if ($thumbnail == '') {
                 $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png';
             }
             $item = new \Item();
             $item->author = $author;
             $item->uri = $article_url;
             $item->title = $article_title;
             $item->thumbnailUri = $thumbnail;
             $item->timestamp = $article_timestamp;
             $item->content = $contents;
             $this->items[] = $item;
             $limit++;
         }
     }
 }
Example #11
0
 public function collectData(array $param)
 {
     //Utility function for retrieving text based on start and end delimiters
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     //Ensure proper parameters have been provided
     if (empty($param['search'])) {
         $this->returnError('You must specify a search criteria', 400);
     }
     //Retrieve torrent listing from search results, which does not contain torrent description
     $url = $this->getURI() . 'torrents/search/?' . $param['search'] . '&order=added&type=desc';
     $html = $this->file_get_html($url) or $this->returnError('Could not request t411: ' . $url, 500);
     $results = $html->find('table.results', 0);
     if (is_null($results)) {
         $this->returnError('No results from t411: ' . $url, 500);
     }
     $limit = 0;
     //Process each item individually
     foreach ($results->find('tr') as $element) {
         //Limit total amount of requests and ignore table header
         if ($limit < 10 && !is_object($element->find('th', 0))) {
             //Requests are rate-limited
             usleep(500000);
             //So we need to wait (500ms)
             //Retrieve data from RSS entry
             $item_uri = $this->getURI() . 'torrents/details/?id=' . ExtractFromDelimiters($element->find('a.nfo', 0)->outertext, '?id=', '"');
             $item_title = ExtractFromDelimiters($element->outertext, '" title="', '"');
             $item_date = strtotime($element->find('dd', 0)->plaintext);
             //Retrieve full description from torrent page
             if ($item_html = $this->file_get_html($item_uri)) {
                 //Retrieve data from page contents
                 $item_desc = $item_html->find('div.description', 0);
                 $item_author = $item_html->find('a.profile', 0)->innertext;
                 //Retrieve image for thumbnail or generic logo fallback
                 $item_image = $this->getURI() . 'themes/blue/images/logo.png';
                 foreach ($item_desc->find('img') as $img) {
                     if (strpos($img->src, 'prez') === false && strpos($img->src, '/ad/') === false) {
                         $item_image = $img->src;
                         break;
                     }
                 }
                 //Cleanup advertisments
                 $divs = explode('<div class="align-center">', $item_desc->innertext);
                 $item_desc = '';
                 foreach ($divs as $text) {
                     if (strpos($text, 'adprovider.adlure.net') === false) {
                         $item_desc = $item_desc . '<div class="align-center">' . $text;
                     }
                 }
                 $item_desc = preg_replace('/<h2 class="align-center">LIENS DE T..?L..?CHARGEMENT<\\/h2>/i', '', $item_desc);
                 //Build and add final item
                 $item = new \Item();
                 $item->uri = $item_uri;
                 $item->title = $item_title;
                 $item->author = $item_author;
                 $item->timestamp = $item_date;
                 $item->thumbnailUri = $item_image;
                 $item->content = $item_desc;
                 $this->items[] = $item;
                 $limit++;
             }
         }
     }
 }
Example #12
0
 public function collectData(array $param)
 {
     function ExtractFromDelimiters($string, $start, $end)
     {
         if (strpos($string, $start) !== false) {
             $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
             $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
             return $section_retrieved;
         }
         return false;
     }
     function StripWithDelimiters($string, $start, $end)
     {
         while (strpos($string, $start) !== false) {
             $section_to_remove = substr($string, strpos($string, $start));
             $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
             $string = str_replace($section_to_remove, '', $string);
         }
         return $string;
     }
     function build_item($uri, $title, $author, $timestamp, $thumnail, $content)
     {
         $item = new \Item();
         $item->uri = $uri;
         $item->title = $title;
         $item->author = $author;
         $item->timestamp = $timestamp;
         $item->thumbnailUri = $thumnail;
         $item->content = $content;
         return $item;
     }
     function cleanup_post_content($content, $site_url)
     {
         $content = str_replace(':arrow:', '&#x27a4;', $content);
         $content = str_replace('href="attachments/', 'href="' . $site_url . 'attachments/', $content);
         $content = StripWithDelimiters($content, '<script', '</script>');
         return $content;
     }
     function fetch_post_content($uri, $site_url)
     {
         $html = $this->file_get_html($uri) or $this->returnError('Could not request GBAtemp: ' . $uri, 500);
         $content = $html->find('div.messageContent', 0)->innertext;
         return cleanup_post_content($content, $site_url);
     }
     $typeFilter = '';
     if (!empty($param['type'])) {
         if ($param['type'] == 'N' || $param['type'] == 'R' || $param['type'] == 'T' || $param['type'] == 'F') {
             $typeFilter = $param['type'];
             if ($typeFilter == 'N') {
                 $this->filter = 'News';
             }
             if ($typeFilter == 'R') {
                 $this->filter = 'Review';
             }
             if ($typeFilter == 'T') {
                 $this->filter = 'Tutorial';
             }
             if ($typeFilter == 'F') {
                 $this->filter = 'Forum';
             }
         } else {
             $this->returnError('The provided type filter is invalid. Expecting N, R, T, or F.', 400);
         }
     } else {
         $this->returnError('Please provide a type filter. Expecting N, R, T, or F.', 400);
     }
     $html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request GBAtemp.', 500);
     if ($typeFilter == 'N') {
         foreach ($html->find('li[class=news_item full]') as $newsItem) {
             $url = $this->getURI() . $newsItem->find('a', 0)->href;
             $img = $this->getURI() . $newsItem->find('img', 0)->src;
             $time = intval(ExtractFromDelimiters($newsItem->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
             $author = $newsItem->find('a.username', 0)->plaintext;
             $title = $newsItem->find('a', 1)->plaintext;
             $content = fetch_post_content($url, $this->getURI());
             $this->items[] = build_item($url, $title, $author, $time, $img, $content);
         }
     } else {
         if ($typeFilter == 'R') {
             foreach ($html->find('li.portal_review') as $reviewItem) {
                 $url = $this->getURI() . $reviewItem->find('a', 0)->href;
                 $img = $this->getURI() . ExtractFromDelimiters($reviewItem->find('a', 0)->style, 'image:url(', ')');
                 $title = $reviewItem->find('span.review_title', 0)->plaintext;
                 $content = $this->file_get_html($url) or $this->returnError('Could not request GBAtemp: ' . $uri, 500);
                 $author = $content->find('a.username', 0)->plaintext;
                 $time = intval(ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
                 $intro = '<p><b>' . $content->find('div#review_intro', 0)->plaintext . '</b></p>';
                 $review = $content->find('div#review_main', 0)->innertext;
                 $subheader = '<p><b>' . $content->find('div.review_subheader', 0)->plaintext . '</b></p>';
                 $procons = $content->find('table.review_procons', 0)->outertext;
                 $scores = $content->find('table.reviewscores', 0)->outertext;
                 $content = cleanup_post_content($intro . $review . $subheader . $procons . $scores, $this->getURI());
                 $this->items[] = build_item($url, $title, $author, $time, $img, $content);
             }
         } else {
             if ($typeFilter == 'T') {
                 foreach ($html->find('li.portal-tutorial') as $tutorialItem) {
                     $url = $this->getURI() . $tutorialItem->find('a', 0)->href;
                     $title = $tutorialItem->find('a', 0)->plaintext;
                     $time = intval(ExtractFromDelimiters($tutorialItem->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
                     $author = $tutorialItem->find('a.username', 0)->plaintext;
                     $content = fetch_post_content($url, $this->getURI());
                     $this->items[] = build_item($url, $title, $author, $time, '', $content);
                 }
             } else {
                 if ($typeFilter == 'F') {
                     foreach ($html->find('li.rc_item') as $postItem) {
                         $url = $this->getURI() . $postItem->find('a', 1)->href;
                         $title = $postItem->find('a', 1)->plaintext;
                         $time = intval(ExtractFromDelimiters($postItem->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
                         $author = $postItem->find('a.username', 0)->plaintext;
                         $content = fetch_post_content($url, $this->getURI());
                         $this->items[] = build_item($url, $title, $author, $time, '', $content);
                     }
                 }
             }
         }
     }
 }