Ejemplo n.º 1
0
 /**
  * Method to parse a Vimeo RSS feed object
  *
  * @return void
  */
 public function parse()
 {
     parent::parse();
     if (null === $this->feed['author']) {
         $this->feed['author'] = str_replace('Vimeo / ', null, $this->feed['title']);
     }
     $items = $this->feed['items'];
     foreach ($items as $key => $item) {
         $id = substr($item['link'], strrpos($item['link'], '/') + 1);
         $items[$key]['id'] = $id;
         $vimeo = \Pop\Http\Response::parse('http://vimeo.com/api/v2/video/' . $id . '.php');
         if (!$vimeo->isError()) {
             $info = unserialize($vimeo->getBody());
             if (isset($info[0]) && is_array($info[0])) {
                 $items[$key]['views'] = isset($info[0]['stats_number_of_plays']) ? $info[0]['stats_number_of_plays'] : null;
                 $items[$key]['likes'] = isset($info[0]['stats_number_of_likes']) ? $info[0]['stats_number_of_likes'] : null;
                 $items[$key]['duration'] = $info[0]['duration'];
                 $items[$key]['image_thumb'] = $info[0]['thumbnail_small'];
                 $items[$key]['image_medium'] = $info[0]['thumbnail_medium'];
                 $items[$key]['image_large'] = $info[0]['thumbnail_large'];
                 foreach ($info[0] as $k => $v) {
                     if ($v != '') {
                         $items[$key][$k] = $v;
                     }
                 }
             }
         }
     }
     $this->feed['items'] = $items;
 }
Ejemplo n.º 2
0
 /**
  * Method to parse Youtube Atom feed object
  *
  * @return void
  */
 public function parse()
 {
     parent::parse();
     $items = $this->feed['items'];
     foreach ($items as $key => $item) {
         if ($items[$key]['content'] == '') {
             $items[$key]['content'] = $item['title'];
         }
         $id = substr($item['link'], strpos($item['link'], 'v=') + 2);
         if (strpos($id, '&') !== false) {
             $id = substr($id, 0, strpos($id, '&'));
         }
         $items[$key]['id'] = $id;
         $youtube = \Pop\Http\Response::parse('http://gdata.youtube.com/feeds/api/videos/' . $id . '?v=2&alt=json');
         if (!$youtube->isError()) {
             $info = json_decode($youtube->getBody(), true);
             $items[$key]['views'] = $info['entry']['yt$statistics']['viewCount'];
             $items[$key]['likes'] = $info['entry']['yt$rating']['numLikes'];
             $items[$key]['duration'] = $info['entry']['media$group']['yt$duration']['seconds'];
             $items[$key]['image_thumb'] = 'http://i.ytimg.com/vi/' . $id . '/default.jpg';
             $items[$key]['image_medium'] = 'http://i.ytimg.com/vi/' . $id . '/mqdefault.jpg';
             $items[$key]['image_large'] = 'http://i.ytimg.com/vi/' . $id . '/hqdefault.jpg';
             foreach ($info as $k => $v) {
                 if ($v != '') {
                     $items[$key][$k] = $v;
                 }
             }
         }
     }
     $this->feed['items'] = $items;
 }
Ejemplo n.º 3
0
 /**
  * Method to parse Youtube JSON feed object
  *
  * @return void
  */
 public function parse()
 {
     parent::parse();
     $this->feed['title'] = $this->feed['title']['$t'];
     $this->feed['url'] = $this->feed['url'][0]['href'];
     $this->feed['description'] = $this->feed['title'];
     $this->feed['date'] = $this->feed['date']['$t'];
     $this->feed['generator'] = $this->feed['generator']['$t'];
     $this->feed['author'] = $this->feed['author'][0]['name']['$t'];
     $items = $this->feed['items'];
     foreach ($items as $key => $item) {
         if (isset($this->obj['feed']['entry'][$key]['content']['$t'])) {
             $content = html_entity_decode($this->obj['feed']['entry'][$key]['content']['$t'], ENT_QUOTES, 'UTF-8');
         } else {
             $content = $this->obj['feed']['entry'][$key]['title']['$t'];
         }
         $items[$key]['title'] = $this->obj['feed']['entry'][$key]['title']['$t'];
         $items[$key]['content'] = $content;
         $items[$key]['link'] = $items[$key]['link'][0]['href'];
         $items[$key]['published'] = $this->obj['feed']['entry'][$key]['published']['$t'];
         $items[$key]['time'] = self::calculateTime($this->obj['feed']['entry'][$key]['published']['$t']);
         $id = substr($items[$key]['link'], strpos($items[$key]['link'], 'v=') + 2);
         if (strpos($id, '&') !== false) {
             $id = substr($id, 0, strpos($id, '&'));
         }
         $items[$key]['id'] = $id;
         $youtube = \Pop\Http\Response::parse('http://gdata.youtube.com/feeds/api/videos/' . $id . '?v=2&alt=json');
         if (!$youtube->isError()) {
             $info = json_decode($youtube->getBody(), true);
             $items[$key]['views'] = $info['entry']['yt$statistics']['viewCount'];
             $items[$key]['likes'] = $info['entry']['yt$rating']['numLikes'];
             $items[$key]['duration'] = $info['entry']['media$group']['yt$duration']['seconds'];
             $items[$key]['image_thumb'] = 'http://i.ytimg.com/vi/' . $id . '/default.jpg';
             $items[$key]['image_medium'] = 'http://i.ytimg.com/vi/' . $id . '/mqdefault.jpg';
             $items[$key]['image_large'] = 'http://i.ytimg.com/vi/' . $id . '/hqdefault.jpg';
             foreach ($info as $k => $v) {
                 if ($v != '') {
                     $items[$key][$k] = $v;
                 }
             }
         }
     }
     $this->feed['items'] = $items;
 }
Ejemplo n.º 4
0
 public function parse($baseUrl, $context, array $tags)
 {
     $dom = null;
     $contentType = null;
     $this->response = Response::parse($this->url, $context);
     if (null !== $this->response->getHeader('Content-type')) {
         $this->contentType = $this->response->getHeader('Content-type');
     } else {
         if (null !== $this->response->getHeader('Content-Type')) {
             $this->contentType = $this->response->getHeader('Content-Type');
         }
     }
     if (null !== $this->contentType && stripos($this->contentType, 'text/html') !== false) {
         if ($this->response->getCode() == 200) {
             $oldError = ini_get('error_reporting');
             error_reporting(0);
             $dom = new \DOMDocument();
             $dom->recover = true;
             $dom->strictErrorChecking = false;
             $dom->loadHTML($this->response->getBody());
             error_reporting($oldError);
         }
     }
     if (null !== $dom) {
         foreach ($tags as $tag) {
             switch ($tag) {
                 case 'title':
                     $title = $dom->getElementsByTagName('title');
                     $this->elements['title'] = null !== $title->item(0) ? trim($title->item(0)->nodeValue) : null;
                     break;
                 case 'meta':
                     $meta = $dom->getElementsByTagName('meta');
                     if (null !== $meta->item(0)) {
                         foreach ($meta as $m) {
                             if ($m->hasAttribute('name') && $m->hasAttribute('content')) {
                                 if (!isset($this->elements['meta'])) {
                                     $this->elements['meta'] = [];
                                 }
                                 $this->elements['meta'][] = ['name' => $m->getAttribute('name'), 'content' => $m->getAttribute('content')];
                             }
                         }
                     }
                     break;
                 case 'a':
                     $anchors = $dom->getElementsByTagName('a');
                     if (null !== $anchors->item(0)) {
                         foreach ($anchors as $a) {
                             if (!isset($this->elements['a'])) {
                                 $this->elements['a'] = [];
                             }
                             $href = $a->hasAttribute('href') ? $a->getAttribute('href') : null;
                             if (null !== $href && $this->isValidHref($href)) {
                                 if (substr($href, 0, strlen($baseUrl)) == $baseUrl) {
                                     $href = substr($href, strlen($baseUrl));
                                 }
                                 $url = substr($this->url, strlen($baseUrl));
                                 if (substr($href, 0, 1) == '/') {
                                     $href = $baseUrl . $href;
                                 } else {
                                     if (substr($href, 0, 2) == './') {
                                         $href = $baseUrl . $url . substr($href, 1);
                                     } else {
                                         if (strpos($href, '../') !== false) {
                                             $depth = substr_count($url, '/');
                                             $levels = substr_count($href, '../');
                                             if ($depth > $levels) {
                                                 for ($i = 0; $i < $levels; $i++) {
                                                     $url = substr($url, 0, strrpos($url, '/'));
                                                 }
                                                 $href = $baseUrl . $url . '/' . str_replace('../', '', $href);
                                             } else {
                                                 $href = $baseUrl . '/' . str_replace('../', '', $href);
                                             }
                                         }
                                     }
                                 }
                                 if (substr($href, 0, strlen($baseUrl)) == $baseUrl && !in_array($href, $this->children) && $this->url != $href) {
                                     $this->children[] = $href;
                                 }
                             }
                             if ($a->nodeValue != '') {
                                 $value = $a->nodeValue;
                             } else {
                                 $imgs = $a->getElementsByTagName('img');
                                 $value = null !== $imgs->item(0) ? '[image]' : null;
                             }
                             $this->elements['a'][] = array('href' => $href, 'value' => $value, 'title' => $a->hasAttribute('title') ? $a->getAttribute('title') : null, 'name' => $a->hasAttribute('name') ? $a->getAttribute('name') : null, 'rel' => $a->hasAttribute('rel') ? $a->getAttribute('rel') : null);
                         }
                     }
                     break;
                 case 'img':
                     $images = $dom->getElementsByTagName('img');
                     if (null !== $images->item(0)) {
                         foreach ($images as $image) {
                             if (!isset($this->elements['img'])) {
                                 $this->elements['img'] = [];
                             }
                             $this->elements['img'][] = ['src' => $image->hasAttribute('src') ? $image->getAttribute('src') : null, 'alt' => $image->hasAttribute('alt') ? $image->getAttribute('alt') : null, 'title' => $image->hasAttribute('title') ? $image->getAttribute('title') : null];
                         }
                     }
                     break;
                 default:
                     $element = $dom->getElementsByTagName($tag);
                     if (null !== $element->item(0)) {
                         foreach ($element as $e) {
                             $this->elements[$tag][] = $e->nodeValue;
                         }
                     }
             }
         }
     }
     return $this->elements;
 }
Ejemplo n.º 5
0
 public function testParse()
 {
     $r = Response::parse('http://www.popphp.org/version');
     $r = Response::parse('http://www.popphp.org/version', array('header' => "Accept-language: en\r\n"));
     $this->assertEquals('200', $r->getCode());
     $this->assertEquals('OK', $r->getMessage());
     $this->assertEquals('1.7.0', trim($r->getBody()));
     $this->assertEquals('text/plain', $r->getHeader('Content-Type'));
     $this->assertTrue($r->isSuccessful());
     $this->assertTrue(is_array($r->getHeaders()));
     $this->assertFalse($r->isError());
     $this->assertFalse($r->isRedirect());
     $r = new Response(200, array('Content-Type' => 'text/plain'));
     $r->setBody('This is a test.');
     $response = $r->getHeadersAsString() . PHP_EOL . $r->getBody();
     $r = Response::parse($response);
     $this->assertEquals('200', $r->getCode());
     $this->assertEquals('OK', $r->getMessage());
     $this->assertEquals('This is a test.', trim($r->getBody()));
 }
Ejemplo n.º 6
0
 /**
  * Constructor
  *
  * Instantiate the document object
  *
  * @param  string $url
  * @param  array  $elements
  * @return \PopSpider\Spider
  */
 public function __construct($url, array $elements = null)
 {
     $this->url = $url;
     $this->schema = substr($this->url, 0, strpos($this->url, '//') + 2);
     $this->base = str_replace($this->schema, '', $this->url);
     if (substr($this->base, -1) == '/') {
         $this->base = substr($this->base, 0, -1);
     }
     if (strpos($this->base, '/') !== false) {
         $base = substr($this->base, 0, strrpos($this->base, '/') + 1);
         $tail = substr($this->base, strrpos($this->base, '/') + 1);
         if (strpos($tail, '.') === false) {
             $this->base = $base . $tail;
         } else {
             $this->base = $base;
         }
     }
     $this->base = $this->schema . $this->base;
     if (substr($this->base, -1) != '/') {
         $this->base .= '/';
     }
     $ua = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0';
     $opts = array('method' => 'GET', 'header' => "Accept-language: en\r\n" . "User-Agent: " . $ua . "\r\n", 'user_agent' => $ua);
     $response = \Pop\Http\Response::parse($this->url, $opts);
     $this->redirect = $response->isRedirect();
     $this->error = $response->isError();
     $this->code = $response->getCode();
     $this->contentLength = strlen($response->getBody());
     if (!$this->error) {
         // Get content type
         if (null !== $response->getHeader('Content-type')) {
             $this->contentType = $response->getHeader('Content-type');
         } else {
             if (null !== $response->getHeader('Content-Type')) {
                 $this->contentType = $response->getHeader('Content-Type');
             }
         }
         // If an HTML page, parse it
         if (stripos($this->contentType, 'text/html') !== false) {
             $oldError = ini_get('error_reporting');
             error_reporting(0);
             $this->dom = new \DOMDocument();
             $this->dom->recover = true;
             $this->dom->strictErrorChecking = false;
             $this->dom->loadHTML($response->getBody());
             error_reporting($oldError);
             $this->parseElements($elements);
         }
     }
 }