public function getMetaData($uri) { $validator = new \Zend\Validator\Uri(array('allowRelative' => false)); if ($validator->isValid($uri)) { $return = array('title' => $uri, 'description' => ''); $metaData = array_merge(array(), get_meta_tags($uri)); if (!key_exists('title', $metaData)) { $ch = curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_URL, $uri); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); $data = curl_exec($ch); curl_close($ch); $doc = new \DOMDocument(); @$doc->loadHTML($data); $nodes = $doc->getElementsByTagName('title'); if (isset($nodes->item(0)->nodeValue)) { $metaData['title'] = $nodes->item(0)->nodeValue; } } return array_merge($return, $metaData); } else { return false; } }
/** * Get a list of thumbs from a URI * @param string $uri * @param integer $limit * @return array */ public function getThumbs($uri, $limit = 5) { $validator = new \Zend\Validator\Uri(array('allowRelative' => false)); $return = array(); if ($validator->isValid($uri)) { $parseInfo = parent::parse($uri); switch ($parseInfo->host) { // Youtube.com case "www.youtube.com": $queryArray = array(); parse_str($parseInfo->query, $queryArray); if (isset($queryArray['v'])) { $return = array("http://img.youtube.com/vi/" . $queryArray['v'] . "/0.jpg", "http://img.youtube.com/vi/" . $queryArray['v'] . "/1.jpg", "http://img.youtube.com/vi/" . $queryArray['v'] . "/2.jpg", "http://img.youtube.com/vi/" . $queryArray['v'] . "/3.jpg"); } break; // Dailymotion.com // Dailymotion.com case "www.dailymotion.com": if (strpos($parseInfo->path, "/video") !== false) { $return = array('http://www.dailymotion.com/thumbnail' . $parseInfo->path); } break; // Vimeo.com // Vimeo.com case "vimeo.com": $id = str_replace("/", "", $parseInfo->path); $data = \Zend\Json\Json::decode(file_get_contents("http://vimeo.com/api/v2/video/{$id}.json")); $return = array($data[0]->thumbnail_medium); break; // others webpage // others webpage default: /** * Credit to http://www.bitrepository.com * http://www.bitrepository.com/extract-images-from-an-url.html */ // Fetch page $string = $this->fetchPage($uri); $out = array(); // Regex for SRC Value $image_regex_src_url = '/<img[^>]*' . 'src=[\\"|\'](.*)[\\"|\']/Ui'; preg_match_all($image_regex_src_url, $string, $out, PREG_PATTERN_ORDER); $return = $out[1]; for ($i = 0; $i < count($return); $i++) { $tUri = new Uri(); $parseInfoThumb = $tUri->parse($return[$i]); if (!$parseInfoThumb->isAbsolute()) { $return[$i] = $parseInfo->scheme . "://" . $parseInfo->host . "" . $return[$i]; } } } } // check && return return array_slice($return, 0, $limit); }
/** * * @param string $uri * @return string */ public function getType($uri) { $validator = new \Zend\Validator\Uri(array('allowRelative' => false)); if ($validator->isValid($uri)) { $parseInfo = parent::parse($uri); switch ($parseInfo->host) { // Youtube.com case "www.youtube.com": $queryArray = array(); parse_str($parseInfo->query, $queryArray); if (isset($queryArray['v'])) { return self::TYPE_VIDEO_YOUTUBE; } break; // Dailymotion.com // Dailymotion.com case "www.dailymotion.com": if (strpos($parseInfo->path, "/video") !== false) { return self::TYPE_VIDEO_DAILYMOTION; } break; // Vimeo.com // Vimeo.com case "vimeo.com": if (is_int($parseInfo->path)) { return self::TYPE_VIDEO_VIMEO; } break; } $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $uri); curl_setopt($ch, CURLOPT_HEADER, TRUE); curl_setopt($ch, CURLOPT_NOBODY, TRUE); // remove body curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); $head = curl_exec($ch); $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); curl_close($ch); if (strpos($contentType, "image") !== false) { return self::TYPE_IMAGE; } else { if ($contentType == "application/x-shockwave-flash") { return self::TYPE_SWF; } } return self::TYPE_WEBPAGE; } else { return ""; } }
/** * @param $url * @return bool */ public function isValidUrl($url) { $uri = new \Zend\Validator\Uri(); if (!$uri->isValid($url)) { return false; } $parseUrl = parse_url($url); if (!isset($parseUrl['host']) || empty($parseUrl['host'])) { return false; } $validator = new \Zend\Validator\Hostname(\Zend\Validator\Hostname::ALLOW_DNS); if (!$validator->isValid($parseUrl['host'])) { return false; } if (!filter_var($parseUrl['host'], FILTER_VALIDATE_URL) === false) { return false; } list($status) = get_headers($url); if (strpos($status, '200') === FALSE) { return false; } return true; }
/** * * @param string $link * @return string */ private function get_link($link) { // Google & Bing news are collections from multiply sources. // We keep the description but set the link to final url. $linkparts = parse_url($link); // Yahoo adds some tracking at the end. Lets remove it // if (preg_match ( '/yahoo/', $linkparts ['host'] )) { // $link = explode ( ';', $link ); // $link = $link [0]; // if (isset ( $linkparts ['path'] )) { // $path = preg_match ( "/RU=.*\/RK/", $linkparts ['path'], $matches ); // $matches = $matches [0]; // $matches = substr ( $matches, 3 ); // $matches = substr ( $matches, 0, - 3 ); // $validator = new \Zend\Validator\Uri (); // if ($validator->isValid ( $matches )) { // $link = $matches; // } // } // } // Google news are collections from multiply sources. // We keep the description but set the link to final url. if (preg_match('/google/', $linkparts['host'])) { if (isset($linkparts['query'])) { parse_str($linkparts['query'], $query); $link = null; if (isset($query['q'])) { $link = $query['q']; } if (isset($query['url'])) { $link = $query['url']; } } } // Bing news are collections from multiply sources. // We keep the description but set the link to final url. if (preg_match('/bing/', $linkparts['host'])) { if (isset($linkparts['query'])) { parse_str($linkparts['query'], $query); if (isset($query['url'])) { $link = $query['url']; } else { $link = null; } } } $validator = new \Zend\Validator\Uri(); if (!$validator->isValid($link)) { return null; } return $link; }