예제 #1
0
 /**
  * fetches all embeded images
  *
  * @param string $url
  * @return array
  */
 public static function fetch($url, $limit = 5, $flat = true)
 {
     //get the html as string
     $html = UrlUtils::getUrlContent(urldecode($url), 'GET');
     $images = self::detect($html);
     $result = array();
     foreach ($images as $image) {
         $image = UrlUtils::abslink($image, $url);
         $size = @getimagesize($image);
         $size = $size[0] + $size[1];
         // ignore all (stats)images smaller than 5x5
         if ($size >= 10) {
             $result[] = array('size' => $size, 'image' => $image);
         }
     }
     usort($result, array("ImageParser", "sort"));
     //$result = array_unique($result);
     $result = array_slice($result, 0, $limit - 1);
     // return only the images and crop the size
     if ($flat) {
         $flat = array();
         foreach ($result as $image) {
             $flat[] = $image['image'];
         }
         $result = $flat;
     }
     return $result;
 }
예제 #2
0
 /**
  * Parse a given html for meta and title-tags
  *
  * @param string $pUrl
  * @return array $lValues
  */
 public static function parse($pHtml, $pUrl)
 {
     if (!preg_match("~<meta.*http-equiv\\s*=\\s*(\"|\\')\\s*Content-Type\\s*(\"|\\').*\\/?>~", $pHtml)) {
         $pHtml = preg_replace('/<head[^>]*>/i', '<head>
                          <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
                         ', $pHtml);
     }
     try {
         $lValues = array();
         //supress html-validation-warnings
         libxml_use_internal_errors(true);
         $lDoc = new DOMDocument();
         $lDoc->loadHTML($pHtml);
         //get all meta-elements
         $lTags = $lDoc->getElementsByTagName('meta');
         //loop the metas
         foreach ($lTags as $lTag) {
             //if attribute name isset make a new entry in an array with key=name and value=content
             if ($lTag->hasAttribute('name')) {
                 $lName = strtolower($lTag->getAttribute('name'));
                 $lValues['meta'][$lName] = $lTag->getAttribute('content');
             }
         }
         //get all title elements
         $lTitles = $lDoc->getElementsByTagName('title');
         //loop the titles
         foreach ($lTitles as $lMetaTitle) {
             $lTitle = $lMetaTitle->nodeValue;
             //and save the value to an array with key=title. if a title is found, break the loop and continue
             if ($lTitle) {
                 $lValues['title'] = $lTitle;
                 continue;
             }
         }
         //get all meta-elements
         $lLinks = $lDoc->getElementsByTagName('link');
         //loop the metas
         foreach ($lLinks as $lLink) {
             //if attribute name isset make a new entry in an array with key=name and value=content
             if ($lLink->hasAttribute('rel')) {
                 $lName = $lLink->getAttribute('rel');
                 $lValues['links'][$lName] = UrlUtils::abslink($lLink->getAttribute('href'), $pUrl);
             }
         }
         return $lValues;
     } catch (Exception $e) {
         continue;
     }
 }