/** * Extract images URL from a blog HTML content. * * @access public * @param String HTML content that can content fom <img /> XHTML element * @return Array Return an array of the images URL founds. */ public static function getMediaURLfromHTML($text = NULL) { $media_ = array(); if (strlen(trim($text)) > 0) { $tt = preg_match_all('/<(source|iframe|embed|param|img)[^>]+src=[\'"]([^\'"]+)[\'"].*>/i', str_replace('><', '> <', FeedValidator::removeBreaklines($text, ' ')), $matches); if ($tt > 0 && count($matches[2]) > 0) { foreach ($matches[2] as $i => $value) { $sb1 = array(); $sb2 = array(); $sb3 = array(); $sb4 = array(); if (FeedValidator::isValidURL($value)) { $simple_tag = str_replace("'", "\"", strtolower(stripcslashes($matches[0][$i]))); $sb1 = explode('title="', $simple_tag); if (count($sb1) > 1) { $sb3 = explode('"', $sb1[1]); } $sb2 = explode('alt="', $simple_tag); if (count($sb2) > 1) { $sb4 = explode('"', $sb2[1]); } $media_type = FeedValidator::getMediaType($value); array_push($media_, array('uri' => $value, 'type' => $media_type, 'name' => isset($sb3[0]) ? $sb3[0] : (isset($sb4[0]) ? $sb4[0] : $media_type . " - " . $i))); } } } // Strip HTML content and analyzed individual world to find URL in the text. (CF: MediaWiki content). $text_split = explode(' ', FeedValidator::getOnlyText($text, self::CHARSET)); if (count($text_split) > 0) { foreach ($text_split as $value) { foreach (array('image', 'sound', 'video') as $media_type) { if (FeedValidator::isValidURL($value)) { if (FeedValidator::isValidMediaURL($value, $media_type)) { if (!in_array($value, $media_)) { array_push($media_, array('uri' => $value, 'type' => $media_type, 'name' => $media_type)); } } } } } } } return $media_; }