Example #1
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function get_url($url, $use_tidy = TRUE)
{
    global $cookies;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute("src");
    $referer = $url->getAttribute("referer");
    $result['data'] = $data;
    if (extension_loaded("tidy")) {
        $data = tidy_parse_string($data, array(), 'utf8');
        tidy_diagnose($data);
        if ($use_tidy) {
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
        }
    } else {
        $result['ref_error_msg'] = tra("Tidy Extension not present");
    }
    $result['url'] = $urlstr;
    $result['xpath'] = $xpath;
    $result['method'] = $url->getAttribute("method");
    $result['post'] = $post;
    $result['get'] = $get;
    $result['referer'] = $referer;
    return $result;
}
function enlight_xpath($url, $xpath)
{
	global $smarty, $cookies,$base_url;
	static $purifier;
	static $loaded = false;

	$result = array();
	$data = $url->getElementsByTagName('data')->item(0)->textContent;
	if (trim($data) == '') {
		return tra('The page is empty');
	}

	if (extension_loaded('tidy')) {
		$data = tidy_parse_string($data, array(), 'utf8');
		tidy_diagnose($data);
	} else {
		if (!$loaded) {
			require_once('lib/htmlpurifier_tiki/HTMLPurifier.tiki.php');
			$config = getHTMLPurifierTikiConfig();
			$config->set('Attr.EnableID', true);
			$purifier = new HTMLPurifier($config);
			$loaded = true;
		}
		if ($purifier) {
			$data = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>'.$purifier->purify($data).'</body></html>';
			//$data = $purifier->purify($data);
		}
	}

	$dom_ref = DOMDocument::loadHTML($data);
	$xp_ref = new DomXPath($dom_ref);
	$res_ref = $xp_ref->query('//head');
	$base = $dom_ref->createElement('base');
	$base->setAttribute('href', $base_url);
	$res_ref->item(0)->insertBefore($base, $res_ref->item(0)->firstChild);
	$res_ref = $xp_ref->query($xpath);
	foreach ($res_ref as $ref) {
		$ref->setAttribute('style', 'background-color: red;');
	}

	return $dom_ref->saveHTML();
}
Example #3
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function verif_url($url, $use_tidy = TRUE)
{
    global $cookies;
    static $purifier;
    static $loaded = false;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute('src');
    if (extension_loaded('http')) {
        $options['timeout'] = 2;
        $options['connecttimeout'] = 2;
        $options['url'] = $url->getAttribute('src');
        $options['referer'] = $url->getAttribute('referer');
        $options['redirect'] = 0;
        $options['cookies'] = $cookies;
        $options['cookiestore'] = tempnam('/tmp/', 'tiki-tests');
        // Close the session to avoid timeout
        session_write_close();
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                $buffer = http_get($urlstr, $options, $info);
                break;
            case 'post':
                $buffer = http_post_fields($urlstr, $post, NULL, $options, $info);
        }
        $headers = http_parse_headers($buffer);
        if (isset($headers['Set-Cookie'])) {
            foreach ($headers['Set-Cookie'] as $c) {
                TikiLib::parse_str($c, $cookies);
            }
        }
        $buffer = http_parse_message($buffer)->body;
    } elseif (extension_loaded('curl')) {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $urlstr);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2);
        curl_setopt($curl, CURLOPT_TIMEOUT, 2);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($curl, CURLOPT_HEADER, true);
        curl_setopt($curl, CURLOPT_REFERER, $url->getAttribute('referer'));
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false);
        curl_setopt($curl, CURLOPT_USERAGENT, 'TikiTest');
        // We deal with the cookies
        $cookies_string = '';
        foreach ($cookies as $c => $v) {
            $cookies_string .= "{$c}={$v}; path=/;";
        }
        curl_setopt($curl, CURLOPT_COOKIE, $cookies_string);
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                curl_setopt($curl, CURLOPT_HTTPGET, true);
                break;
            case 'post':
                curl_setopt($curl, CURLOPT_POST, true);
                $post_string = '';
                foreach ($post as $p => $v) {
                    if ($post_string != '') {
                        $post_string .= '&';
                    }
                    $post_string .= "{$p}={$v}";
                }
                curl_setopt($curl, CURLOPT_POSTFIELDS, $post_string);
        }
        // Close the session to avoid timeout
        session_write_close();
        $http_response = curl_exec($curl);
        $header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
        $header = substr($http_response, 0, $header_size);
        $body = substr($http_response, $header_size);
        preg_match_all('|Set-Cookie: (.*);|U', $header, $cookies_array);
        foreach ($cookies_array[1] as $c) {
            $cookies_tmp .= "&{$c}";
        }
        TikiLib::parse_str($cookies_tmp, $cookies_titi);
        if (!is_array($cookies)) {
            $cookies = array();
        }
        $cookies = array_merge($cookies, $cookies_titi);
        $buffer = $body;
        curl_close($curl);
    }
    if (extension_loaded('tidy')) {
        $data = tidy_parse_string($data, array(), 'utf8');
        $buffer = tidy_parse_string($buffer, array(), 'utf8');
        if ($use_tidy) {
            tidy_diagnose($data);
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
            tidy_diagnose($buffer);
            $result['replay_error_count'] = tidy_error_count($buffer);
            $result['replay_error_msg'] = tidy_get_error_buffer($buffer);
        }
    } else {
        if (!$loaded) {
            require_once 'lib/htmlpurifier_tiki/HTMLPurifier.tiki.php';
            $config = getHTMLPurifierTikiConfig();
            $purifier = new HTMLPurifier($config);
            $loaded = true;
        }
        if ($purifier) {
            $data = '<html><body>' . $purifier->purify($data) . '</body></html>';
            $buffer = '<html><body>' . $purifier->purify($buffer) . '</body></html>';
        }
        $result['ref_error_msg'] = tra('The Tidy extension is not present');
        $result['replay_error_msg'] = tra('The Tidy extension is not present');
    }
    // If we have a XPath then we extract the new DOM and print it in HTML
    if (trim($xpath) != '') {
        $dom_ref = DOMDocument::loadHTML($data);
        $xp_ref = new DomXPath($dom_ref);
        $res_ref = $xp_ref->query($xpath);
        $new_data = new DOMDocument('1.0');
        $root = $new_data->createElement('html');
        $root = $new_data->appendChild($root);
        $body = $new_data->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_ref as $ref) {
            $tmp = $new_data->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $data = $new_data->saveHTML();
        $dom_buffer = DOMDocument::loadHTML($buffer);
        $xp_buffer = new DomXPath($dom_buffer);
        $res_buffer = $xp_buffer->query($xpath);
        $new_buffer = new DOMDocument('1.0');
        $root = $new_buffer->createElement('html');
        $root = $new_buffer->appendChild($root);
        $body = $new_buffer->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_buffer as $ref) {
            $tmp = $new_buffer->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $buffer = $new_buffer->saveHTML();
    }
    $tmp = diff2($data, $buffer, "htmldiff");
    if (trim($xpath) != '') {
        $result['html'] = preg_replace(array("/<html>/", "/<\\/html>/"), array("<div style='overflow: auto; width:500px; text-align: center'> ", "</div>"), $tmp);
    } else {
        $result['html'] = preg_replace(array("/<html.*<body/U", "/<\\/body><\\/html>/U"), array("<div style='overflow: auto; width:500px; text-align: center' ", "</div>"), $tmp);
    }
    $result['url'] = $urlstr;
    $result['method'] = $url->getAttribute('method');
    if (strtolower($result['method']) == 'post') {
        $result['post'] = $post;
    }
    return $result;
}
<?php

$a = tidy_parse_string('<HTML></HTML>');
var_dump(tidy_diagnose($a));
echo str_replace("\r", "", tidy_get_error_buffer($a));
$html = <<<HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
<html>
<head><title>foo</title></head>
<body><p>hello</p></body>
</html>
HTML;
$a = tidy_parse_string($html);
var_dump(tidy_diagnose($a));
echo tidy_get_error_buffer($a);