/** * Get a remote page title * * This function returns a string: either the page title as defined in HTML, or the URL if not found * The function tries to convert funky characters found in titles to UTF8, from the detected charset. * Charset in use is guessed from HTML meta tag, or if not found, from server's 'content-type' response. * * @param string $url URL * @return string Title (sanitized) or the URL if no title found */ function yourls_get_remote_title($url) { // Allow plugins to short-circuit the whole function $pre = yourls_apply_filter('shunt_get_remote_title', false, $url); if (false !== $pre) { return $pre; } $url = yourls_sanitize_url($url); // Only deal with http(s):// if (!in_array(yourls_get_protocol($url), array('http://', 'https://'))) { return $url; } $title = $charset = false; $response = yourls_http_get($url); // can be a Request object or an error string if (is_string($response)) { return $url; } // Page content. No content? Return the URL $content = $response->body; if (!$content) { return $url; } // look for <title>. No title found? Return the URL if (preg_match('/<title>(.*?)<\\/title>/is', $content, $found)) { $title = $found[1]; unset($found); } if (!$title) { return $url; } // Now we have a title. We'll try to get proper utf8 from it. // Get charset as (and if) defined by the HTML meta tag. We should match // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> // or <meta charset='utf-8'> and all possible variations: see https://gist.github.com/ozh/7951236 if (preg_match('/<meta[^>]*charset\\s*=["\' ]*([a-zA-Z0-9\\-_]+)/is', $content, $found)) { $charset = $found[1]; unset($found); } else { // No charset found in HTML. Get charset as (and if) defined by the server response $_charset = current($response->headers->getValues('content-type')); if (preg_match('/charset=(\\S+)/', $_charset, $found)) { $charset = trim($found[1], ';'); unset($found); } } // Conversion to utf-8 if what we have is not utf8 already if (strtolower($charset) != 'utf-8' && function_exists('mb_convert_encoding')) { // We use @ to remove warnings because mb_ functions are easily bitching about illegal chars if ($charset) { $title = @mb_convert_encoding($title, 'UTF-8', $charset); } else { $title = @mb_convert_encoding($title, 'UTF-8'); } } // Remove HTML entities $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); // Strip out evil things $title = yourls_sanitize_title($title); return yourls_apply_filter('get_remote_title', $title, $url); }
/** * Perform a GET request, return body or null if there was an error * * @since 1.7 * @see yourls_http_request * @return mixed String (page body) or null if error */ function yourls_http_get_body($url, $headers = array(), $data = array(), $options = array()) { $return = yourls_http_get($url, $headers, $data, $options); return isset($return->body) ? $return->body : null; }