/** * Extract embedded elements from a HTML string. * * Returns an array of IFrame elements found in the input string. * Elements without a 'src' attribute are skipped. * * Each array item has the same basic structure as the array items * returned by blcUtility::extract_tags(), plus an additional 'embed_code' key * that contains the full HTML code for the entire <ifram> tag. * * @uses blcUtility::extract_tags() This function is a simple wrapper around extract_tags() * * @param string $html * @return array */ function extract_embeds($html) { $results = array(); //remove all <code></code> blocks first $html = preg_replace('/<code[^>]*>.+?<\\/code>/si', ' ', $html); //Find likely-looking <object> elements $iframes = blcUtility::extract_tags($html, 'iframe', false, true); foreach ($iframes as $embed) { if (empty($embed['attributes']['src'])) { continue; } $embed['embed_code'] = $embed['full_tag']; $results[] = $embed; } return $results; }
/** * Extract embedded elements from a HTML string. * * This function returns an array of <embed> elements found in the input * string. Embeds without a 'src' attribute are skipped. * * Each array item has the same basic structure as the array items * returned by blcUtility::extract_tags(), plus an additional 'embed_code' key * that contains the HTML code for the element. If the embed element is wrapped * in an <object>, the 'embed_code' key contains the full HTML for the entire * <object> + <embed> structure. * * @uses blcUtility::extract_tags() This function is a simple wrapper around extract_tags() * * @param string $html * @return array */ function extract_embeds($html) { $results = array(); //remove all <code></code> blocks first $html = preg_replace('/<code[^>]*>.+?<\\/code>/si', ' ', $html); //Find likely-looking <object> elements $objects = blcUtility::extract_tags($html, 'object', false, true); foreach ($objects as $candidate) { //Find the <embed> tag $embed = blcUtility::extract_tags($candidate['full_tag'], 'embed', true); if (empty($embed)) { continue; } $embed = reset($embed); //Take the first (and only) found <embed> element if (empty($embed['attributes']['src'])) { continue; } $embed['embed_code'] = $candidate['full_tag']; $results[] = $embed; //Remove the element so it doesn't come up when we search for plain <embed> elements. $html = str_replace($candidate['full_tag'], ' ', $html); } //Find <embed> elements not wrapped in an <object> element. $embeds = blcUtility::extract_tags($html, 'embed', false, true); foreach ($embeds as $embed) { if (!empty($embed['attributes']['src'])) { $embed['embed_code'] = $embed['full_tag']; $results[] = $embed; } } return $results; }
function check($url) { $result = array('final_url' => $url, 'redirect_count' => 0, 'timeout' => false, 'broken' => false, 'log' => "<em>(Using YouTube API)</em>\n\n", 'result_hash' => ''); //Extract the video ID from the URL $components = @parse_url($url); parse_str($components['query'], $query); $video_id = $query['v']; //Fetch video data from the YouTube API $api_url = 'http://gdata.youtube.com/feeds/api/videos/' . $video_id; $conf =& blc_get_configuration(); $args = array('timeout' => $conf->options['timeout']); $start = microtime_float(); $response = wp_remote_get($api_url, $args); $result['request_duration'] = microtime_float() - $start; //Placeholders for video restriction data $state_name = $state_reason = ''; //Got anything? if (is_wp_error($response)) { $result['log'] .= "Error.\n" . $response->get_error_message(); //WP doesn't make it easy to distinguish between different internal errors. $result['broken'] = true; $result['http_code'] = 0; } else { $result['http_code'] = intval($response['response']['code']); switch ($result['http_code']) { case 404: //Not found $result['log'] .= __('Video Not Found', 'broken-link-checker'); $result['broken'] = true; $result['http_code'] = 0; $result['status_text'] = __('Video Not Found', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_ERROR; break; case 403: //Forbidden. Usually means that the video has been removed. Body contains details. $result['log'] .= $response['body']; $result['broken'] = true; $result['http_code'] = 0; $result['status_text'] = __('Video Removed', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_ERROR; break; case 400: //Bad request. Usually means that the video ID is incorrect. Body contains details. $result['log'] .= $response['body']; $result['broken'] = true; $result['http_code'] = 0; $result['status_text'] = __('Invalid Video ID', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_WARNING; break; case 200: //Video exists, but may be restricted. Check for <yt:state> tags. //See http://code.google.com/apis/youtube/2.0/reference.html#youtube_data_api_tag_yt:state //Can we count on an XML parser being installed? No, probably not. //Back to our makeshift tag "parser" we go. $state = blcUtility::extract_tags($response['body'], 'yt:state', false); if (empty($state)) { //Phew, no restrictions. $result['log'] .= __("Video OK", 'broken-link-checker'); $result['status_text'] = __('OK', 'link status', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_OK; $result['http_code'] = 0; } else { //Get the state name and code and append them to the log $state = reset($state); $state_name = $state['attributes']['name']; $state_reason = isset($state['attributes']['reasonCode']) ? $state['attributes']['reasonCode'] : ''; $result['result_hash'] = 'youtube_api|' . $state_name . '|' . $state_reason; $result['log'] .= sprintf(__('Video status : %s%s', 'broken-link-checker'), $state_name, $state_reason ? ' [' . $state_reason . ']' : ''); //A couple of restricted states are not that bad $state_ok = $state_name == 'processing' || $state_name == 'restricted' && $state_reason == 'limitedSyndication'; if ($state_ok) { $result['broken'] = false; $result['status_text'] = __('OK', 'link status', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_OK; $result['http_code'] = 0; } else { $result['broken'] = true; $result['status_text'] = __('Video Restricted', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_WARNING; $result['http_code'] = 0; } } //Add the video title to the log, purely for information. //http://code.google.com/apis/youtube/2.0/reference.html#youtube_data_api_tag_media:title $title = blcUtility::extract_tags($response['body'], 'media:title', false); if (!empty($title)) { $result['log'] .= "\n\nTitle : \"" . $title[0]['contents'] . '"'; } break; default: $result['log'] .= $result['http_code'] . $response['response']['message']; $result['log'] .= "\n" . __('Unknown YouTube API response received.'); break; } } //The hash should contain info about all pieces of data that pertain to determining if the //link is working. $result['result_hash'] = implode('|', array('youtube', $result['http_code'], $result['broken'] ? 'broken' : '0', $result['timeout'] ? 'timeout' : '0', $state_name, $state_reason)); return $result; }
/** * Extract <embed> elements from a HTML string. * * This function returns an array of <embed> elements found in the input * string. Only <embed>'s that are inside <object>'s are considered. Embeds * without a 'src' attribute are skipped. * * Each array item has the same basic structure as the array items * returned by blcUtility::extract_tags(), plus an additional 'wrapper' key * that contains similarly structured info about the wrapping <object> tag. * * @uses blcUtility::extract_tags() This function is a simple wrapper around extract_tags() * * @param string $html * @return array */ function extract_embeds($html) { $results = array(); //remove all <code></code> blocks first $content = preg_replace('/<code[^>]*>.+?<\\/code>/si', ' ', $content); //Find likely-looking <object> elements $objects = blcUtility::extract_tags($html, 'object', false, true); foreach ($objects as $candidate) { //Find the <embed> tag $embed = blcUtility::extract_tags($candidate['full_tag'], 'embed', false); if (empty($embed)) { continue; } $embed = reset($embed); //Take the first (and only) found <embed> element if (empty($embed['attributes']['src'])) { continue; } $embed['wrapper'] = $candidate; $results[] = $embed; } return $results; }
/** * Check a YouTube API response that contains a single playlist. * * @param array $response * @param array $result * @return array */ protected function check_playlist($response, $result) { switch ($result['http_code']) { case 404: //Not found $result['log'] .= __('Playlist Not Found', 'broken-link-checker'); $result['broken'] = true; $result['http_code'] = 0; $result['status_text'] = __('Playlist Not Found', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_ERROR; break; case 403: //Forbidden. We're unlikely to see this code for playlists, but lets allow it. $result['log'] .= $response['body']; $result['broken'] = true; $result['status_text'] = __('Playlist Restricted', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_ERROR; break; case 400: //Bad request. Probably indicates a client error (invalid API request). Body contains details. $result['log'] .= $response['body']; $result['broken'] = true; $result['status_text'] = __('Invalid Playlist', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_WARNING; break; case 200: //The playlist exists, but some of the videos may be restricted. //Check for <yt:state> tags. $video_states = blcUtility::extract_tags($response['body'], 'yt:state', false); if (empty($video_states)) { //No restrictions. Does the playlist have any entries? $entries = blcUtility::extract_tags($response['body'], 'entry', false); if (!empty($entries)) { //All is well. $result['log'] .= __("Playlist OK", 'broken-link-checker'); $result['status_text'] = __('OK', 'link status', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_OK; $result['http_code'] = 0; } else { //An empty playlist. It is possible that all of the videos //have been deleted. Treat it as a warning. $result['log'] .= __("This playlist has no entries or all entries have been deleted.", 'broken-link-checker'); $result['status_text'] = __('Empty Playlist', 'link status', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_WARNING; $result['http_code'] = 0; $result['broken'] = true; } } else { //Treat the playlist as broken if at least one video is inaccessible. foreach ($video_states as $state) { $state_name = $state['attributes']['name']; $state_reason = isset($state['attributes']['reasonCode']) ? $state['attributes']['reasonCode'] : ''; if (!$this->is_state_ok($state_name, $state_reason)) { $result['log'] .= sprintf(__('Video status : %s%s', 'broken-link-checker'), $state_name, $state_reason ? ' [' . $state_reason . ']' : ''); $result['state_name'] = $state_name; $result['state_reason'] = $state_reason; $result['broken'] = true; $result['status_text'] = __('Video Restricted', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_WARNING; $result['http_code'] = 0; break; } } if (!$result['broken']) { $result['status_text'] = __('OK', 'link status', 'broken-link-checker'); $result['status_code'] = BLC_LINK_STATUS_OK; $result['http_code'] = 0; } } //Add the playlist title to the log, purely for information. $title = blcUtility::extract_tags($response['body'], 'title', false); if (!empty($title)) { $result['log'] .= "\n\nPlaylist title : \"" . $title[0]['contents'] . '"'; } break; default: $result['log'] .= $result['http_code'] . $response['response']['message']; $result['log'] .= "\n" . __('Unknown YouTube API response received.'); break; } return $result; }
/** * Apply a callback function to all HTML links found in a string and return the results. * * The link data array will contain at least these keys : * 'href' - the URL of the link (with htmlentitydecode() already applied). * '#raw' - the raw link code, e.g. the entire '<a href="...">...</a>' tag of a HTML link. * '#offset' - the offset within $content at which the first character of the link tag was found. * '#link_text' - the link's anchor text, if any. May contain HTML tags. * * Any attributes of the link tag will also be included in the returned array as attr_name => attr_value * pairs. This function will also automatically decode any HTML entities found in attribute values. * * @see blcParser::map() * * @param string $content A text string to parse for links. * @param callback $callback Callback function to apply to all found links. * @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback. * @return array An array of all detected links after applying $callback to each of them. */ function map($content, $callback, $extra = null) { $results = array(); //Find all links $links = blcUtility::extract_tags($content, 'a', false, true); //Iterate over the links and apply $callback to each foreach ($links as $link) { //Massage the found link into a form required for the callback function $param = $link['attributes']; $param = array_merge($param, array('#raw' => $link['full_tag'], '#offset' => $link['offset'], '#link_text' => $link['contents'], 'href' => isset($link['attributes']['href']) ? $link['attributes']['href'] : '')); //Prepare arguments for the callback $params = array($param); if (isset($extra)) { $params[] = $extra; } //Execute & store :) $results[] = call_user_func_array($callback, $params); } return $results; }