public function collectData(array $param) { //Extract a string using start and end delimiters function ExtractFromDelimiters($string, $start, $end) { if (strpos($string, $start) !== false) { $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); return $section_retrieved; } return false; } //Utility function for cleaning a Facebook link $unescape_fb_link = function ($matches) { if (is_array($matches) && count($matches) > 1) { $link = $matches[1]; if (strpos($link, '/') === 0) { $link = 'https://www.facebook.com' . $link . '"'; } if (strpos($link, 'facebook.com/l.php?u=') !== false) { $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); } return ' href="' . $link . '"'; } }; //Utility function for converting facebook emoticons $unescape_fb_emote = function ($matches) { static $facebook_emoticons = array('smile' => ':)', 'frown' => ':(', 'tongue' => ':P', 'grin' => ':D', 'gasp' => ':O', 'wink' => ';)', 'pacman' => ':<', 'grumpy' => '>_<', 'unsure' => ':/', 'cry' => ':\'(', 'kiki' => '^_^', 'glasses' => '8-)', 'sunglasses' => 'B-)', 'heart' => '<3', 'devil' => ']:D', 'angel' => '0:)', 'squint' => '-_-', 'confused' => 'o_O', 'upset' => 'xD', 'colonthree' => ':3', 'like' => '👍'); $len = count($matches); if ($len > 1) { for ($i = 1; $i < $len; $i++) { foreach ($facebook_emoticons as $name => $emote) { if ($matches[$i] === $name) { return $emote; } } } } return $matches[0]; }; $html = null; //Handle captcha response sent by the viewer if (isset($_POST['captcha_response'])) { if (session_status() == PHP_SESSION_NONE) { session_start(); } if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) { $captcha_action = $_SESSION['captcha_action']; $captcha_fields = $_SESSION['captcha_fields']; $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']); $http_options = array('http' => array('method' => 'POST', 'user_agent' => ini_get('user_agent'), 'header' => array("Content-type: application/x-www-form-urlencoded\r\nReferer: {$captcha_action}\r\nCookie: noscript=1\r\n"), 'content' => http_build_query($captcha_fields))); $context = stream_context_create($http_options); $html = file_get_contents($captcha_action, false, $context); if ($html === FALSE) { $this->returnError('Failed to submit captcha response back to Facebook', 500); } unset($_SESSION['captcha_fields']); $html = str_get_html($html); } unset($_SESSION['captcha_fields']); unset($_SESSION['captcha_action']); } //Retrieve page contents if (is_null($html)) { if (isset($param['u'])) { if (!strpos($param['u'], "/")) { $html = $this->file_get_html('https://www.facebook.com/' . urlencode($param['u']) . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404); } else { $html = $this->file_get_html('https://www.facebook.com/pages/' . $param['u'] . '?_fb_noscript=1') or $this->returnError('No results for this query.', 404); } } else { $this->returnError('You must specify a Facebook username.', 400); } } //Handle captcha form? $captcha = $html->find('div.captcha_interstitial', 0); if (!is_null($captcha)) { //Save form for submitting after getting captcha response if (session_status() == PHP_SESSION_NONE) { session_start(); } $captcha_fields = array(); foreach ($captcha->find('input, button') as $input) { $captcha_fields[$input->name] = $input->value; } $_SESSION['captcha_fields'] = $captcha_fields; $_SESSION['captcha_action'] = 'https://www.facebook.com' . $captcha->find('form', 0)->action; //Show captcha filling form to the viewer, proxying the captcha image $img = base64_encode(file_get_contents($captcha->find('img', 0)->src)); header('HTTP/1.1 500 ' . Http::getMessageForCode(500)); header('Content-Type: text/html'); die('<form method="post" action="?' . $_SERVER['QUERY_STRING'] . '">' . '<h2>Facebook captcha challenge</h2>' . '<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />' . 'Facebook wants rss-bridge to resolve the following captcha:</p>' . '<p><img src="data:image/png;base64,' . $img . '" /></p>' . '<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />' . '<input type="submit" value="Submit!" /></p>' . '</form>'); } //No captcha? We can carry on retrieving page contents :) $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0); if (isset($element)) { $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); $profilePic = 'https://graph.facebook.com/' . $param['u'] . '/picture?width=200&height=200'; $this->name = $author; foreach ($element->children() as $post) { $item = new \Item(); if (count($post->find('abbr')) > 0) { //Retrieve post contents $content = preg_replace('/(?i)><div class=\\"clearfix([^>]+)>(.+?)div\\ class=\\"userContent\\"/i', '', $post); $content = preg_replace('/(?i)><div class=\\"_59tj([^>]+)>(.+?)<\\/div><\\/div><a/i', '', $content); $content = preg_replace('/(?i)><div class=\\"_3dp([^>]+)>(.+?)div\\ class=\\"[^u]+userContent\\"/i', '', $content); $content = preg_replace('/(?i)><div class=\\"_4l5([^>]+)>(.+?)<\\/div>/i', '', $content); //Remove html nodes, keep only img, links, basic formatting $content = strip_tags($content, '<a><img><i><u>'); //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection $content = preg_replace_callback('/ href=\\"([^"]+)\\"/i', $unescape_fb_link, $content); //Clean useless html tag properties and fix link closing tags foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex', 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name) { $content = preg_replace('/ ' . $property_name . '=\\"[^"]*\\"/i', '', $content); } $content = preg_replace('/<\\/a [^>]+>/i', '</a>', $content); //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\\/u><\\/i>/i', $unescape_fb_emote, $content); //Retrieve date of the post $date = $post->find("abbr")[0]; if (isset($date) && $date->hasAttribute('data-utime')) { $date = $date->getAttribute('data-utime'); } else { $date = 0; } //Build title from username and content $title = $author; if (strlen($title) > 24) { $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; } $title = $title . ' | ' . strip_tags($content); if (strlen($title) > 64) { $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; } //Use first image as thumbnail if available, or profile pic fallback $thumbnail = $post->find('img', 1); if (is_object($thumbnail)) { $thumbnail = $thumbnail->src; } else { $thumbnail = $profilePic; } //Build and add final item $item->uri = 'https://facebook.com' . $post->find('abbr')[0]->parent()->getAttribute('href'); $item->thumbnailUri = $thumbnail; $item->content = $content; $item->title = $title; $item->author = $author; $item->timestamp = $date; $this->items[] = $item; } } } }
if (isset($_REQUEST["disable_cache"])) { } else { $bridge->setCache($cache); // just add disable cache to your query to disable caching } $bridge->setDatas($_REQUEST); // Data transformation $format = Format::create($format); $format->setDatas($bridge->getDatas())->setExtraInfos(array('name' => $bridge->getName(), 'uri' => $bridge->getURI()))->display(); die; } break; } } } catch (HttpException $e) { header('HTTP/1.1 ' . $e->getCode() . ' ' . Http::getMessageForCode($e->getCode())); header('Content-Type: text/plain'); die($e->getMessage()); } catch (\Exception $e) { die($e->getMessage()); } function getHelperButtonFormat($value, $name) { return '<button type="submit" name="format" value="' . $value . '">' . $name . '</button>'; } function getHelperButtonsFormat($formats) { $buttons = ''; foreach ($formats as $name => $infos) { if (isset($infos['name'])) { $buttons .= getHelperButtonFormat($name, $infos['name']) . PHP_EOL;