/** * fetches all embeded images * * @param string $url * @return array */ public static function fetch($url, $limit = 5, $flat = true) { //get the html as string $html = UrlUtils::getUrlContent(urldecode($url), 'GET'); $images = self::detect($html); $result = array(); foreach ($images as $image) { $image = UrlUtils::abslink($image, $url); $size = @getimagesize($image); $size = $size[0] + $size[1]; // ignore all (stats)images smaller than 5x5 if ($size >= 10) { $result[] = array('size' => $size, 'image' => $image); } } usort($result, array("ImageParser", "sort")); //$result = array_unique($result); $result = array_slice($result, 0, $limit - 1); // return only the images and crop the size if ($flat) { $flat = array(); foreach ($result as $image) { $flat[] = $image['image']; } $result = $flat; } return $result; }
/** * Parse a given html for meta and title-tags * * @param string $pUrl * @return array $lValues */ public static function parse($pHtml, $pUrl) { if (!preg_match("~<meta.*http-equiv\\s*=\\s*(\"|\\')\\s*Content-Type\\s*(\"|\\').*\\/?>~", $pHtml)) { $pHtml = preg_replace('/<head[^>]*>/i', '<head> <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> ', $pHtml); } try { $lValues = array(); //supress html-validation-warnings libxml_use_internal_errors(true); $lDoc = new DOMDocument(); $lDoc->loadHTML($pHtml); //get all meta-elements $lTags = $lDoc->getElementsByTagName('meta'); //loop the metas foreach ($lTags as $lTag) { //if attribute name isset make a new entry in an array with key=name and value=content if ($lTag->hasAttribute('name')) { $lName = strtolower($lTag->getAttribute('name')); $lValues['meta'][$lName] = $lTag->getAttribute('content'); } } //get all title elements $lTitles = $lDoc->getElementsByTagName('title'); //loop the titles foreach ($lTitles as $lMetaTitle) { $lTitle = $lMetaTitle->nodeValue; //and save the value to an array with key=title. if a title is found, break the loop and continue if ($lTitle) { $lValues['title'] = $lTitle; continue; } } //get all meta-elements $lLinks = $lDoc->getElementsByTagName('link'); //loop the metas foreach ($lLinks as $lLink) { //if attribute name isset make a new entry in an array with key=name and value=content if ($lLink->hasAttribute('rel')) { $lName = $lLink->getAttribute('rel'); $lValues['links'][$lName] = UrlUtils::abslink($lLink->getAttribute('href'), $pUrl); } } return $lValues; } catch (Exception $e) { continue; } }