Example #1
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function get_url($url, $use_tidy = TRUE)
{
    global $cookies;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute("src");
    $referer = $url->getAttribute("referer");
    $result['data'] = $data;
    if (extension_loaded("tidy")) {
        $data = tidy_parse_string($data, array(), 'utf8');
        tidy_diagnose($data);
        if ($use_tidy) {
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
        }
    } else {
        $result['ref_error_msg'] = tra("Tidy Extension not present");
    }
    $result['url'] = $urlstr;
    $result['xpath'] = $xpath;
    $result['method'] = $url->getAttribute("method");
    $result['post'] = $post;
    $result['get'] = $get;
    $result['referer'] = $referer;
    return $result;
}
Example #2
0
File: Page.php Project: broozer/psa
 public function __destruct()
 {
     try {
         if (!self::$html_set || !self::$head_set) {
             throw new PageException("<b>HTML class exception.</b><br />Either &lt;html&gt; or &lt;head&gt; or &lt;body&gt; is not set.</b><br />\n\t\t\t\t\tAll these tags need to be used in order to generate valid html forms.");
         }
         self::$output .= "</body>\n</html>";
         if (self::$debug) {
             echo '<b>Tidy messages</b><br />';
             $tidy = tidy_parse_string(self::$output);
             echo nl2br(htmlentities(tidy_get_error_buffer($tidy)));
             echo '<hr />';
             $linedump = explode("\n", nl2br(htmlentities(str_replace("<br />", "\n", self::$output))));
             // var_dump($linedump);
             for ($i = 0; $i < sizeof($linedump); ++$i) {
                 if (trim(str_replace("<br>", "", $linedump[$i])) == '') {
                     continue;
                 }
                 $il = strlen($i);
                 $il4 = 4 - $il;
                 $j = $i + 1;
                 $linenr = str_repeat("&nbsp;", $il4) . $j;
                 echo $linenr . ' : ' . $linedump[$i];
             }
             echo '<hr />';
             self::$output = '';
         } else {
             echo self::$output;
             self::$output = '';
         }
     } catch (PageException $e) {
         echo $e->getMessage();
     }
 }
Example #3
0
 /**
  * Show tidy warning
  */
 private function reportWarning()
 {
     $warning = tidy_get_error_buffer($this->tidy);
     if (!empty($warning)) {
         eZDebugSetting::writeWarning("extension-eztidy", "{$warning}", 'eZTidy::tidyCleaner()');
     }
 }
Example #4
0
    /**
     * validation of attributes value
     *
     * @param string $validation_type
     * @param string $attribute
     * @param string $value
     * @return boolean
     */
    public function validation($validation_type, $attribute, $value)
    {
        switch ($validation_type) {
            /* please dont' use boolean, it's not a good idea in PHP :) */
            case 'boolean':
                if (is_bool($value)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            case 'int':
            case 'decimal':
            case 'numeric':
                if (is_numeric($value)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            case 'string':
            case 'text':
            case 'serialized':
            case 'xml':
                $value = trim($value);
                if ($value != '') {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    if ($this->_metaData[$attribute]['required'] == true) {
                        $this->setValid($attribute, false);
                        /*
                        ($this->_metaData[$attribute]['label'] == '') ? $label = $attribute: $label = $this->_metaData[$attribute]['label'];
                        msg("$label is required","error", 0);
                        */
                        return false;
                    }
                }
            case 'xhtml':
                //don't do any validation if Tidy is not installed
                if (!function_exists('tidy_get_status')) {
                    return true;
                }
                //msg($_GET['request']);
                //msg($value);
                $tidy_content = '
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>test</title></head><body>' . $value . '</body></html>';
                // Specify configuration
                $config = array('show-warnings' => true, 'doctype' => 'transitional', 'indent' => true, 'output-xhtml' => true, 'wrap' => 200);
                // Tidy
                $tidy = new tidy();
                $tidy->parseString($tidy_content, $config, 'utf8');
                //$tidy->cleanRepair();
                //$tidy->diagnose();
                // get result
                $result_status = tidy_get_status($tidy);
                $result_message = tidy_get_error_buffer($tidy);
                if ($result_status > 1) {
                    $error = $result_message;
                } else {
                    if ($result_status > 0) {
                        msg("Tidy warning: {$result_message}", "error", 2);
                    }
                }
                if ($error != '') {
                    msg($error, 'error');
                    $this->setValid($attribute, false);
                    return false;
                } else {
                    $this->setValid($attribute, true);
                    return true;
                }
                break;
            case 'datetime':
                //$this->setValid($attribute, true);
                return true;
                break;
            case 'date':
                // ISO date
                $regex = "/^\\d{4}-\\d{1,2}-\\d{1,2}\$/";
                if (preg_match($regex, $value, $matches)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            case 'email':
                $regex = '/^([*+!.&#$|\'\\%\\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\\.)+[0-9a-z]{2,32})$/i';
                if (preg_match($regex, $value, $matches)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    msg(I18N_ERROR_ENTER_VALID_EMAIL, 'error');
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            case 'url':
                $regex = '/^(http:\\/\\/|ftp:\\/\\/)/i';
                if (preg_match($regex, $value, $matches)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    msg(I18N_ERROR_WRONG_URL, "error", 2);
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            case 'decimal':
                $this->setValid($attribute, true);
                return true;
                break;
            case 'product_code':
                /*
                 * be aware of "_", in SQL LIKE (escape it, or don't use it) 
                 */
                if (preg_match('/^[0-9a-zA-Z-]*$/', $pc)) {
                    $this->setValid($attribute, true);
                    return true;
                } else {
                    msg(I18N_ERROR_INVALID_PRODUCT_CODE, 'error', 2);
                    $this->setValid($attribute, false);
                    return false;
                }
                break;
            default:
                $this->setValid($attribute, true);
                return true;
                break;
        }
    }
 /**
  * Do the content validation and repair it.
  * 
  * For example:
  * 	$repairedContent =
  * 		TidyValidator::create()->
  * 		setContent('<b>blablabla')->
  * 		validateContent()->
  * 		getContent();
  * 
  * Or just:
  * 	$repairedContent =
  * 		TidyValidator::create()->
  * 		validateContent('<b>blablabla')->
  * 		getContent();
  * 
  * @param $content content to validate
  * @return TidyValidator
  **/
 public function validateContent($content = null)
 {
     static $symbols = array('…' => '&hellip;', '™' => '&trade;', '©' => '&copy;', '№' => '&#8470;', '—' => '&mdash;', '–' => '&mdash;', '«' => '&laquo;', '»' => '&raquo;', '„' => '&bdquo;', '“' => '&ldquo;', '•' => '&bull;', '®' => '&reg;', '¼' => '&frac14;', '½' => '&frac12;', '¾' => '&frac34;', '±' => '&plusmn;');
     if ($content) {
         $this->setContent($content);
     } elseif (!$this->getContent()) {
         return $this;
     }
     $tidy = tidy_parse_string($this->getHeader() . "\n" . $this->getContent() . "\n</body></html>", $this->getConfig(), $this->getEncoding());
     $this->errorCount = tidy_error_count($tidy);
     $this->warningCount = tidy_warning_count($tidy);
     $rawMessages = tidy_get_error_buffer($tidy);
     $out = null;
     if (!empty($rawMessages)) {
         $errorStrings = explode("\n", htmlspecialchars($rawMessages));
         foreach ($errorStrings as $string) {
             list(, $num, , $rest) = explode(' ', $string, 4);
             $out .= ($out == null ? null : "\n") . 'line ' . ($num - $this->headerLines) . ' column ' . $rest;
         }
     }
     $tidy->cleanRepair();
     $outContent = array();
     preg_match_all('/<body>(.*)<\\/body>/s', $tidy, $outContent);
     Assert::isTrue(isset($outContent[1][0]));
     $outContent[1][0] = strtr($outContent[1][0], $symbols);
     $crcBefore = crc32(preg_replace('/[\\t\\n\\r\\0 ]/', null, $this->getContent()));
     $crcAfter = crc32(preg_replace('/[\\t\\n\\r\\0 ]/', null, $outContent[1][0]));
     if ($crcBefore != $crcAfter) {
         if ($this->countTags('<[\\t ]*p[\\t ]*>', $this->getContent()) != $this->countTags('<[\\t ]*p[\\t ]*>', $outContent[1][0]) || $this->countTags('<[\\t ]*\\/[\\t ]*p[\\t ]*>', $this->getContent()) != $this->countTags('<[\\t ]*\\/[\\t ]*p[\\t ]*>', $outContent[1][0])) {
             $out = ($out == null ? null : $out . "\n\n") . 'Paragraphs have been changed, please review content';
         } else {
             if (!$out) {
                 $out = 'Content has been changed, please review';
             }
         }
     }
     $this->messages = $out;
     $this->content = $outContent[1][0];
     return $this;
 }
Example #6
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function verif_url($url, $use_tidy = TRUE)
{
    global $cookies;
    static $purifier;
    static $loaded = false;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute('src');
    if (extension_loaded('http')) {
        $options['timeout'] = 2;
        $options['connecttimeout'] = 2;
        $options['url'] = $url->getAttribute('src');
        $options['referer'] = $url->getAttribute('referer');
        $options['redirect'] = 0;
        $options['cookies'] = $cookies;
        $options['cookiestore'] = tempnam('/tmp/', 'tiki-tests');
        // Close the session to avoid timeout
        session_write_close();
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                $buffer = http_get($urlstr, $options, $info);
                break;
            case 'post':
                $buffer = http_post_fields($urlstr, $post, NULL, $options, $info);
        }
        $headers = http_parse_headers($buffer);
        if (isset($headers['Set-Cookie'])) {
            foreach ($headers['Set-Cookie'] as $c) {
                TikiLib::parse_str($c, $cookies);
            }
        }
        $buffer = http_parse_message($buffer)->body;
    } elseif (extension_loaded('curl')) {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $urlstr);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2);
        curl_setopt($curl, CURLOPT_TIMEOUT, 2);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($curl, CURLOPT_HEADER, true);
        curl_setopt($curl, CURLOPT_REFERER, $url->getAttribute('referer'));
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false);
        curl_setopt($curl, CURLOPT_USERAGENT, 'TikiTest');
        // We deal with the cookies
        $cookies_string = '';
        foreach ($cookies as $c => $v) {
            $cookies_string .= "{$c}={$v}; path=/;";
        }
        curl_setopt($curl, CURLOPT_COOKIE, $cookies_string);
        switch (strtolower($url->getAttribute('method'))) {
            case 'get':
                curl_setopt($curl, CURLOPT_HTTPGET, true);
                break;
            case 'post':
                curl_setopt($curl, CURLOPT_POST, true);
                $post_string = '';
                foreach ($post as $p => $v) {
                    if ($post_string != '') {
                        $post_string .= '&';
                    }
                    $post_string .= "{$p}={$v}";
                }
                curl_setopt($curl, CURLOPT_POSTFIELDS, $post_string);
        }
        // Close the session to avoid timeout
        session_write_close();
        $http_response = curl_exec($curl);
        $header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
        $header = substr($http_response, 0, $header_size);
        $body = substr($http_response, $header_size);
        preg_match_all('|Set-Cookie: (.*);|U', $header, $cookies_array);
        foreach ($cookies_array[1] as $c) {
            $cookies_tmp .= "&{$c}";
        }
        TikiLib::parse_str($cookies_tmp, $cookies_titi);
        if (!is_array($cookies)) {
            $cookies = array();
        }
        $cookies = array_merge($cookies, $cookies_titi);
        $buffer = $body;
        curl_close($curl);
    }
    if (extension_loaded('tidy')) {
        $data = tidy_parse_string($data, array(), 'utf8');
        $buffer = tidy_parse_string($buffer, array(), 'utf8');
        if ($use_tidy) {
            tidy_diagnose($data);
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
            tidy_diagnose($buffer);
            $result['replay_error_count'] = tidy_error_count($buffer);
            $result['replay_error_msg'] = tidy_get_error_buffer($buffer);
        }
    } else {
        if (!$loaded) {
            require_once 'lib/htmlpurifier_tiki/HTMLPurifier.tiki.php';
            $config = getHTMLPurifierTikiConfig();
            $purifier = new HTMLPurifier($config);
            $loaded = true;
        }
        if ($purifier) {
            $data = '<html><body>' . $purifier->purify($data) . '</body></html>';
            $buffer = '<html><body>' . $purifier->purify($buffer) . '</body></html>';
        }
        $result['ref_error_msg'] = tra('The Tidy extension is not present');
        $result['replay_error_msg'] = tra('The Tidy extension is not present');
    }
    // If we have a XPath then we extract the new DOM and print it in HTML
    if (trim($xpath) != '') {
        $dom_ref = DOMDocument::loadHTML($data);
        $xp_ref = new DomXPath($dom_ref);
        $res_ref = $xp_ref->query($xpath);
        $new_data = new DOMDocument('1.0');
        $root = $new_data->createElement('html');
        $root = $new_data->appendChild($root);
        $body = $new_data->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_ref as $ref) {
            $tmp = $new_data->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $data = $new_data->saveHTML();
        $dom_buffer = DOMDocument::loadHTML($buffer);
        $xp_buffer = new DomXPath($dom_buffer);
        $res_buffer = $xp_buffer->query($xpath);
        $new_buffer = new DOMDocument('1.0');
        $root = $new_buffer->createElement('html');
        $root = $new_buffer->appendChild($root);
        $body = $new_buffer->createElement('html');
        $body = $root->appendChild($body);
        foreach ($res_buffer as $ref) {
            $tmp = $new_buffer->importNode($ref, TRUE);
            $body->appendChild($tmp);
        }
        $buffer = $new_buffer->saveHTML();
    }
    $tmp = diff2($data, $buffer, "htmldiff");
    if (trim($xpath) != '') {
        $result['html'] = preg_replace(array("/<html>/", "/<\\/html>/"), array("<div style='overflow: auto; width:500px; text-align: center'> ", "</div>"), $tmp);
    } else {
        $result['html'] = preg_replace(array("/<html.*<body/U", "/<\\/body><\\/html>/U"), array("<div style='overflow: auto; width:500px; text-align: center' ", "</div>"), $tmp);
    }
    $result['url'] = $urlstr;
    $result['method'] = $url->getAttribute('method');
    if (strtolower($result['method']) == 'post') {
        $result['post'] = $post;
    }
    return $result;
}
Example #7
0
<?php

/*
 * cleanhtml.php
 *
 * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents
 * if no file is provided, it reads from standard input.
 *
 * NOTE: Works only with tidy for PHP 4.3.x, for tidy in PHP 5 see cleanhtml5.php
 *
 * By: John Coggeshall <*****@*****.**>
 *
 * Usage: php cleanhtml.php [filename]
 *
 */
if (!isset($_SERVER['argv'][1])) {
    $data = file_get_contents("php://stdin");
    tidy_parse_string($data);
} else {
    tidy_parse_file($_SERVER['argv'][1]);
}
tidy_clean_repair();
if (tidy_warning_count() || tidy_error_count()) {
    echo "\n\nThe following errors or warnings occurred:\n";
    echo tidy_get_error_buffer();
    echo "\n";
}
echo tidy_get_output();
Example #8
0
 /**
  * Use HTML Tidy to validate the $text
  * Only runs when $config['HTML_Tidy'] is off
  *
  * @param string $text The html content to be checked. Passed by reference
  */
 static function tidyFix(&$text, $ignore_config = false)
 {
     global $config;
     if (!$ignore_config) {
         if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') {
             return true;
         }
     }
     if (!function_exists('tidy_parse_string')) {
         return false;
     }
     $options = array();
     $options['wrap'] = 0;
     //keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below
     $options['doctype'] = 'omit';
     //omit, auto, strict, transitional, user
     $options['drop-empty-paras'] = true;
     //drop empty paragraphs
     $options['output-xhtml'] = true;
     //need this so that <br> will be <br/> .. etc
     $options['show-body-only'] = true;
     $options['hide-comments'] = false;
     //$options['anchor-as-name'] = true;		//default is true, but not alwasy availabel. When true, adds an id attribute to anchor; when false, removes the name attribute... poorly designed, but we need it to be true
     //
     //	php4
     //
     if (function_exists('tidy_setopt')) {
         $options['char-encoding'] = 'utf8';
         gp_edit::tidyOptions($options);
         $tidy = tidy_parse_string($text);
         tidy_clean_repair();
         if (tidy_get_status() === 2) {
             // 2 is magic number for fatal error
             // http://www.php.net/manual/en/function.tidy-get-status.php
             $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
             return false;
         }
         $text = tidy_get_output();
         //
         //	php5
         //
     } else {
         $tidy = tidy_parse_string($text, $options, 'utf8');
         tidy_clean_repair($tidy);
         if (tidy_get_status($tidy) === 2) {
             // 2 is magic number for fatal error
             // http://www.php.net/manual/en/function.tidy-get-status.php
             $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
             return false;
         }
         $text = tidy_get_output($tidy);
     }
     return true;
 }
Example #9
0
 /**
  * Executes the tidy command and returns an array of result lines
  *
  * @param  string $markup
  *
  * @return array
  */
 public function executeTidy($markup)
 {
     $tidy = tidy_parse_string($markup);
     $lines = explode("\n", tidy_get_error_buffer($tidy));
     return $lines;
 }
Example #10
0
<?php

$tidy = tidy_parse_file("intro2_ex1.html");
tidy_clean_repair($tidy);
echo tidy_get_error_buffer($tidy);
Example #11
0
<?php

/* Parse a file */
$tidy1 = tidy_parse_file("myfile.html");
/* Parse a string */
$tidy2 = tidy_parse_string("<HTML><B>Hello!</B>");
/* Clean up the markup */
tidy_clean_repair($tidy1);
tidy_clean_repair($tidy2);
/* Get the error buffer */
$errors = tidy_get_error_buffer($tidy1);
/* Get the output */
$output = tidy_get_output($tidy2);