Esempio n. 1
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function get_url($url, $use_tidy = TRUE)
{
    global $cookies;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute("src");
    $referer = $url->getAttribute("referer");
    $result['data'] = $data;
    if (extension_loaded("tidy")) {
        $data = tidy_parse_string($data, array(), 'utf8');
        tidy_diagnose($data);
        if ($use_tidy) {
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
        }
    } else {
        $result['ref_error_msg'] = tra("Tidy Extension not present");
    }
    $result['url'] = $urlstr;
    $result['xpath'] = $xpath;
    $result['method'] = $url->getAttribute("method");
    $result['post'] = $post;
    $result['get'] = $get;
    $result['referer'] = $referer;
    return $result;
}
Esempio n. 2
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function verif_url($url, $use_tidy = TRUE)
{
    global $cookies;
    static $purifier;
    static $loaded = false;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute('src');
    if (extension_loaded('http')) {
        $options['timeout'] = 2;
        $options['connecttimeout'] = 2;
        $options['url'] = $url->getAttribute('src');
        $options['referer'] = $url->getAttribute('referer');
        $options['redirect'] = 0;
        $options['cookies'] = $cookies;
        $options['cookiestore'] = tempnam('/tmp/', 'tiki-tests');
        // Close the session to avoid timeout
        session_write_close();
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                $buffer = http_get($urlstr, $options, $info);
                break;
            case 'post':
                $buffer = http_post_fields($urlstr, $post, NULL, $options, $info);
        }
        $headers = http_parse_headers($buffer);
        if (isset($headers['Set-Cookie'])) {
            foreach ($headers['Set-Cookie'] as $c) {
                TikiLib::parse_str($c, $cookies);
            }
        }
        $buffer = http_parse_message($buffer)->body;
    } elseif (extension_loaded('curl')) {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $urlstr);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2);
        curl_setopt($curl, CURLOPT_TIMEOUT, 2);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($curl, CURLOPT_HEADER, true);
        curl_setopt($curl, CURLOPT_REFERER, $url->getAttribute('referer'));
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false);
        curl_setopt($curl, CURLOPT_USERAGENT, 'TikiTest');
        // We deal with the cookies
        $cookies_string = '';
        foreach ($cookies as $c => $v) {
            $cookies_string .= "{$c}={$v}; path=/;";
        }
        curl_setopt($curl, CURLOPT_COOKIE, $cookies_string);
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                curl_setopt($curl, CURLOPT_HTTPGET, true);
                break;
            case 'post':
                curl_setopt($curl, CURLOPT_POST, true);
                $post_string = '';
                foreach ($post as $p => $v) {
                    if ($post_string != '') {
                        $post_string .= '&';
                    }
                    $post_string .= "{$p}={$v}";
                }
                curl_setopt($curl, CURLOPT_POSTFIELDS, $post_string);
        }
        // Close the session to avoid timeout
        session_write_close();
        $http_response = curl_exec($curl);
        $header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
        $header = substr($http_response, 0, $header_size);
        $body = substr($http_response, $header_size);
        preg_match_all('|Set-Cookie: (.*);|U', $header, $cookies_array);
        foreach ($cookies_array[1] as $c) {
            $cookies_tmp .= "&{$c}";
        }
        TikiLib::parse_str($cookies_tmp, $cookies_titi);
        if (!is_array($cookies)) {
            $cookies = array();
        }
        $cookies = array_merge($cookies, $cookies_titi);
        $buffer = $body;
        curl_close($curl);
    }
    if (extension_loaded('tidy')) {
        $data = tidy_parse_string($data, array(), 'utf8');
        $buffer = tidy_parse_string($buffer, array(), 'utf8');
        if ($use_tidy) {
            tidy_diagnose($data);
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
            tidy_diagnose($buffer);
            $result['replay_error_count'] = tidy_error_count($buffer);
            $result['replay_error_msg'] = tidy_get_error_buffer($buffer);
        }
    } else {
        if (!$loaded) {
            require_once 'lib/htmlpurifier_tiki/HTMLPurifier.tiki.php';
            $config = getHTMLPurifierTikiConfig();
            $purifier = new HTMLPurifier($config);
            $loaded = true;
        }
        if ($purifier) {
            $data = '<html><body>' . $purifier->purify($data) . '</body></html>';
            $buffer = '<html><body>' . $purifier->purify($buffer) . '</body></html>';
        }
        $result['ref_error_msg'] = tra('The Tidy extension is not present');
        $result['replay_error_msg'] = tra('The Tidy extension is not present');
    }
    // If we have a XPath then we extract the new DOM and print it in HTML
    if (trim($xpath) != '') {
        $dom_ref = DOMDocument::loadHTML($data);
        $xp_ref = new DomXPath($dom_ref);
        $res_ref = $xp_ref->query($xpath);
        $new_data = new DOMDocument('1.0');
        $root = $new_data->createElement('html');
        $root = $new_data->appendChild($root);
        $body = $new_data->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_ref as $ref) {
            $tmp = $new_data->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $data = $new_data->saveHTML();
        $dom_buffer = DOMDocument::loadHTML($buffer);
        $xp_buffer = new DomXPath($dom_buffer);
        $res_buffer = $xp_buffer->query($xpath);
        $new_buffer = new DOMDocument('1.0');
        $root = $new_buffer->createElement('html');
        $root = $new_buffer->appendChild($root);
        $body = $new_buffer->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_buffer as $ref) {
            $tmp = $new_buffer->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $buffer = $new_buffer->saveHTML();
    }
    $tmp = diff2($data, $buffer, "htmldiff");
    if (trim($xpath) != '') {
        $result['html'] = preg_replace(array("/<html>/", "/<\\/html>/"), array("<div style='overflow: auto; width:500px; text-align: center'> ", "</div>"), $tmp);
    } else {
        $result['html'] = preg_replace(array("/<html.*<body/U", "/<\\/body><\\/html>/U"), array("<div style='overflow: auto; width:500px; text-align: center' ", "</div>"), $tmp);
    }
    $result['url'] = $urlstr;
    $result['method'] = $url->getAttribute('method');
    if (strtolower($result['method']) == 'post') {
        $result['post'] = $post;
    }
    return $result;
}