Ejemplo n.º 1
0
 private function parsePage($url)
 {
     $content = @UTIL_HttpResource::getContents($url);
     $matches = array();
     preg_match('/<\\s*meta\\s*[^\\>]*?http-equiv=[\'"]content-type[\'"][^\\>]*?\\s*>/i', $content, $matches);
     $meta = empty($matches[0]) ? null : $matches[0];
     preg_match('/content=[\'"][^\'"]*?charset=([\\w-]+)(:[^\\w-][^\'"])*?[\'"]/i', $meta, $matches);
     $encoding = empty($matches[1]) ? 'UTF-8' : $matches[1];
     preg_match('/<\\s*title\\s*>([\\s\\S]*?)<\\s*\\/\\s*title\\s*>/i', $content, $matches);
     $title = empty($matches[1]) ? null : mb_convert_encoding($matches[1], 'UTF-8', $encoding);
     $matches = array();
     $meta = "";
     preg_match('/<\\s*meta\\s*[^\\>]*?name=[\'"]description[\'"][^\\>]*?\\s*>/i', $content, $matches);
     $meta = empty($matches[0]) ? null : $matches[0];
     $matches = array();
     preg_match('/content=[\'"](.*?)[\'"]/i', $meta, $matches);
     $description = empty($matches[1]) ? null : mb_convert_encoding($matches[1], 'UTF-8', $encoding);
     $matches = array();
     preg_match_all('/<\\s*img\\s*.*?src=[\'"](.+?)[\'"].*?>/i', $content, $matches);
     $images = array();
     foreach ($matches[1] as $img) {
         $urlInfo = parse_url($url);
         $imgInfo = parse_url($img);
         if (empty($imgInfo['host'])) {
             $imgDir = dirname($imgInfo['path']);
             $urlScheme = empty($urlInfo['scheme']) ? '' : $urlInfo['scheme'] . '://';
             $urlAddr = $urlScheme . $urlInfo['host'];
             if (strpos($imgDir, '/') === 0) {
                 $img = $urlAddr . $imgInfo['path'];
             } elseif (!empty($urlInfo['path'])) {
                 $pp = pathinfo($urlInfo['path']);
                 $urlPath = $pp['dirname'] . (empty($pp['extension']) ? $pp['basename'] . '/' : '');
                 $img = $urlAddr . $urlPath . $imgInfo['path'];
             } else {
                 $img = $urlAddr . '/' . $imgInfo['path'];
             }
         }
         $images[] = $img;
     }
     $firstImg = reset($images);
     $firstImg = $firstImg ? $firstImg : null;
     return array('type' => 'link', 'description' => UTIL_HtmlTag::escapeHtml($description), 'title' => UTIL_HtmlTag::escapeHtml($title), 'thumbnail_url' => $firstImg, 'allImages' => $images);
 }