Exemplo n.º 1
0
/**
 * Get a remote page title
 *
 * This function returns a string: either the page title as defined in HTML, or the URL if not found
 * The function tries to convert funky characters found in titles to UTF8, from the detected charset.
 * Charset in use is guessed from HTML meta tag, or if not found, from server's 'content-type' response.
 *
 * @param string $url URL
 * @return string Title (sanitized) or the URL if no title found
 */
function yourls_get_remote_title($url)
{
    // Allow plugins to short-circuit the whole function
    $pre = yourls_apply_filter('shunt_get_remote_title', false, $url);
    if (false !== $pre) {
        return $pre;
    }
    $url = yourls_sanitize_url($url);
    // Only deal with http(s)://
    if (!in_array(yourls_get_protocol($url), array('http://', 'https://'))) {
        return $url;
    }
    $title = $charset = false;
    $response = yourls_http_get($url);
    // can be a Request object or an error string
    if (is_string($response)) {
        return $url;
    }
    // Page content. No content? Return the URL
    $content = $response->body;
    if (!$content) {
        return $url;
    }
    // look for <title>. No title found? Return the URL
    if (preg_match('/<title>(.*?)<\\/title>/is', $content, $found)) {
        $title = $found[1];
        unset($found);
    }
    if (!$title) {
        return $url;
    }
    // Now we have a title. We'll try to get proper utf8 from it.
    // Get charset as (and if) defined by the HTML meta tag. We should match
    // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    // or <meta charset='utf-8'> and all possible variations: see https://gist.github.com/ozh/7951236
    if (preg_match('/<meta[^>]*charset\\s*=["\' ]*([a-zA-Z0-9\\-_]+)/is', $content, $found)) {
        $charset = $found[1];
        unset($found);
    } else {
        // No charset found in HTML. Get charset as (and if) defined by the server response
        $_charset = current($response->headers->getValues('content-type'));
        if (preg_match('/charset=(\\S+)/', $_charset, $found)) {
            $charset = trim($found[1], ';');
            unset($found);
        }
    }
    // Conversion to utf-8 if what we have is not utf8 already
    if (strtolower($charset) != 'utf-8' && function_exists('mb_convert_encoding')) {
        // We use @ to remove warnings because mb_ functions are easily bitching about illegal chars
        if ($charset) {
            $title = @mb_convert_encoding($title, 'UTF-8', $charset);
        } else {
            $title = @mb_convert_encoding($title, 'UTF-8');
        }
    }
    // Remove HTML entities
    $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
    // Strip out evil things
    $title = yourls_sanitize_title($title);
    return yourls_apply_filter('get_remote_title', $title, $url);
}
Exemplo n.º 2
0
/**
 * Perform a GET request, return body or null if there was an error
 *
 * @since 1.7
 * @see yourls_http_request
 * @return mixed String (page body) or null if error
 */
function yourls_http_get_body($url, $headers = array(), $data = array(), $options = array())
{
    $return = yourls_http_get($url, $headers, $data, $options);
    return isset($return->body) ? $return->body : null;
}