/** * @param $url * @param bool $use_tidy * @return array */ function get_url($url, $use_tidy = TRUE) { global $cookies; $smarty = TikiLib::lib('smarty'); $result = array(); $get = get_from_dom($url->getElementsByTagName('get')->item(0)); $post = get_from_dom($url->getElementsByTagName('post')->item(0)); $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent; $data = $url->getElementsByTagName('data')->item(0)->textContent; $urlstr = $url->getAttribute("src"); $referer = $url->getAttribute("referer"); $result['data'] = $data; if (extension_loaded("tidy")) { $data = tidy_parse_string($data, array(), 'utf8'); tidy_diagnose($data); if ($use_tidy) { $result['ref_error_count'] = tidy_error_count($data); $result['ref_error_msg'] = tidy_get_error_buffer($data); } } else { $result['ref_error_msg'] = tra("Tidy Extension not present"); } $result['url'] = $urlstr; $result['xpath'] = $xpath; $result['method'] = $url->getAttribute("method"); $result['post'] = $post; $result['get'] = $get; $result['referer'] = $referer; return $result; }
public function __destruct() { try { if (!self::$html_set || !self::$head_set) { throw new PageException("<b>HTML class exception.</b><br />Either <html> or <head> or <body> is not set.</b><br />\n\t\t\t\t\tAll these tags need to be used in order to generate valid html forms."); } self::$output .= "</body>\n</html>"; if (self::$debug) { echo '<b>Tidy messages</b><br />'; $tidy = tidy_parse_string(self::$output); echo nl2br(htmlentities(tidy_get_error_buffer($tidy))); echo '<hr />'; $linedump = explode("\n", nl2br(htmlentities(str_replace("<br />", "\n", self::$output)))); // var_dump($linedump); for ($i = 0; $i < sizeof($linedump); ++$i) { if (trim(str_replace("<br>", "", $linedump[$i])) == '') { continue; } $il = strlen($i); $il4 = 4 - $il; $j = $i + 1; $linenr = str_repeat(" ", $il4) . $j; echo $linenr . ' : ' . $linedump[$i]; } echo '<hr />'; self::$output = ''; } else { echo self::$output; self::$output = ''; } } catch (PageException $e) { echo $e->getMessage(); } }
/** * Show tidy warning */ private function reportWarning() { $warning = tidy_get_error_buffer($this->tidy); if (!empty($warning)) { eZDebugSetting::writeWarning("extension-eztidy", "{$warning}", 'eZTidy::tidyCleaner()'); } }
/** * validation of attributes value * * @param string $validation_type * @param string $attribute * @param string $value * @return boolean */ public function validation($validation_type, $attribute, $value) { switch ($validation_type) { /* please dont' use boolean, it's not a good idea in PHP :) */ case 'boolean': if (is_bool($value)) { $this->setValid($attribute, true); return true; } else { $this->setValid($attribute, false); return false; } break; case 'int': case 'decimal': case 'numeric': if (is_numeric($value)) { $this->setValid($attribute, true); return true; } else { $this->setValid($attribute, false); return false; } break; case 'string': case 'text': case 'serialized': case 'xml': $value = trim($value); if ($value != '') { $this->setValid($attribute, true); return true; } else { if ($this->_metaData[$attribute]['required'] == true) { $this->setValid($attribute, false); /* ($this->_metaData[$attribute]['label'] == '') ? $label = $attribute: $label = $this->_metaData[$attribute]['label']; msg("$label is required","error", 0); */ return false; } } case 'xhtml': //don't do any validation if Tidy is not installed if (!function_exists('tidy_get_status')) { return true; } //msg($_GET['request']); //msg($value); $tidy_content = ' <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>test</title></head><body>' . $value . '</body></html>'; // Specify configuration $config = array('show-warnings' => true, 'doctype' => 'transitional', 'indent' => true, 'output-xhtml' => true, 'wrap' => 200); // Tidy $tidy = new tidy(); $tidy->parseString($tidy_content, $config, 'utf8'); //$tidy->cleanRepair(); //$tidy->diagnose(); // get result $result_status = tidy_get_status($tidy); $result_message = tidy_get_error_buffer($tidy); if ($result_status > 1) { $error = $result_message; } else { if ($result_status > 0) { msg("Tidy warning: {$result_message}", "error", 2); } } if ($error != '') { msg($error, 'error'); $this->setValid($attribute, false); return false; } else { $this->setValid($attribute, true); return true; } break; case 'datetime': //$this->setValid($attribute, true); return true; break; case 'date': // ISO date $regex = "/^\\d{4}-\\d{1,2}-\\d{1,2}\$/"; if (preg_match($regex, $value, $matches)) { $this->setValid($attribute, true); return true; } else { $this->setValid($attribute, false); return false; } break; case 'email': $regex = '/^([*+!.&#$|\'\\%\\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\\.)+[0-9a-z]{2,32})$/i'; if (preg_match($regex, $value, $matches)) { $this->setValid($attribute, true); return true; } else { msg(I18N_ERROR_ENTER_VALID_EMAIL, 'error'); $this->setValid($attribute, false); return false; } break; case 'url': $regex = '/^(http:\\/\\/|ftp:\\/\\/)/i'; if (preg_match($regex, $value, $matches)) { $this->setValid($attribute, true); return true; } else { msg(I18N_ERROR_WRONG_URL, "error", 2); $this->setValid($attribute, false); return false; } break; case 'decimal': $this->setValid($attribute, true); return true; break; case 'product_code': /* * be aware of "_", in SQL LIKE (escape it, or don't use it) */ if (preg_match('/^[0-9a-zA-Z-]*$/', $pc)) { $this->setValid($attribute, true); return true; } else { msg(I18N_ERROR_INVALID_PRODUCT_CODE, 'error', 2); $this->setValid($attribute, false); return false; } break; default: $this->setValid($attribute, true); return true; break; } }
/** * Do the content validation and repair it. * * For example: * $repairedContent = * TidyValidator::create()-> * setContent('<b>blablabla')-> * validateContent()-> * getContent(); * * Or just: * $repairedContent = * TidyValidator::create()-> * validateContent('<b>blablabla')-> * getContent(); * * @param $content content to validate * @return TidyValidator **/ public function validateContent($content = null) { static $symbols = array('…' => '…', '™' => '™', '©' => '©', '№' => '№', '—' => '—', '–' => '—', '«' => '«', '»' => '»', '„' => '„', '“' => '“', '•' => '•', '®' => '®', '¼' => '¼', '½' => '½', '¾' => '¾', '±' => '±'); if ($content) { $this->setContent($content); } elseif (!$this->getContent()) { return $this; } $tidy = tidy_parse_string($this->getHeader() . "\n" . $this->getContent() . "\n</body></html>", $this->getConfig(), $this->getEncoding()); $this->errorCount = tidy_error_count($tidy); $this->warningCount = tidy_warning_count($tidy); $rawMessages = tidy_get_error_buffer($tidy); $out = null; if (!empty($rawMessages)) { $errorStrings = explode("\n", htmlspecialchars($rawMessages)); foreach ($errorStrings as $string) { list(, $num, , $rest) = explode(' ', $string, 4); $out .= ($out == null ? null : "\n") . 'line ' . ($num - $this->headerLines) . ' column ' . $rest; } } $tidy->cleanRepair(); $outContent = array(); preg_match_all('/<body>(.*)<\\/body>/s', $tidy, $outContent); Assert::isTrue(isset($outContent[1][0])); $outContent[1][0] = strtr($outContent[1][0], $symbols); $crcBefore = crc32(preg_replace('/[\\t\\n\\r\\0 ]/', null, $this->getContent())); $crcAfter = crc32(preg_replace('/[\\t\\n\\r\\0 ]/', null, $outContent[1][0])); if ($crcBefore != $crcAfter) { if ($this->countTags('<[\\t ]*p[\\t ]*>', $this->getContent()) != $this->countTags('<[\\t ]*p[\\t ]*>', $outContent[1][0]) || $this->countTags('<[\\t ]*\\/[\\t ]*p[\\t ]*>', $this->getContent()) != $this->countTags('<[\\t ]*\\/[\\t ]*p[\\t ]*>', $outContent[1][0])) { $out = ($out == null ? null : $out . "\n\n") . 'Paragraphs have been changed, please review content'; } else { if (!$out) { $out = 'Content has been changed, please review'; } } } $this->messages = $out; $this->content = $outContent[1][0]; return $this; }
/** * @param $url * @param bool $use_tidy * @return array */ function verif_url($url, $use_tidy = TRUE) { global $cookies; static $purifier; static $loaded = false; $smarty = TikiLib::lib('smarty'); $result = array(); $get = get_from_dom($url->getElementsByTagName('get')->item(0)); $post = get_from_dom($url->getElementsByTagName('post')->item(0)); $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent; $data = $url->getElementsByTagName('data')->item(0)->textContent; $urlstr = $url->getAttribute('src'); if (extension_loaded('http')) { $options['timeout'] = 2; $options['connecttimeout'] = 2; $options['url'] = $url->getAttribute('src'); $options['referer'] = $url->getAttribute('referer'); $options['redirect'] = 0; $options['cookies'] = $cookies; $options['cookiestore'] = tempnam('/tmp/', 'tiki-tests'); // Close the session to avoid timeout session_write_close(); switch (strtolower($url->getAttribute('method'))) { case 'get': $buffer = http_get($urlstr, $options, $info); break; case 'post': $buffer = http_post_fields($urlstr, $post, NULL, $options, $info); } $headers = http_parse_headers($buffer); if (isset($headers['Set-Cookie'])) { foreach ($headers['Set-Cookie'] as $c) { TikiLib::parse_str($c, $cookies); } } $buffer = http_parse_message($buffer)->body; } elseif (extension_loaded('curl')) { $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $urlstr); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2); curl_setopt($curl, CURLOPT_TIMEOUT, 2); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_HEADER, true); curl_setopt($curl, CURLOPT_REFERER, $url->getAttribute('referer')); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false); curl_setopt($curl, CURLOPT_USERAGENT, 'TikiTest'); // We deal with the cookies $cookies_string = ''; foreach ($cookies as $c => $v) { $cookies_string .= "{$c}={$v}; path=/;"; } curl_setopt($curl, CURLOPT_COOKIE, $cookies_string); switch (strtolower($url->getAttribute('method'))) { case 'get': curl_setopt($curl, CURLOPT_HTTPGET, true); break; case 'post': curl_setopt($curl, CURLOPT_POST, true); $post_string = ''; foreach ($post as $p => $v) { if ($post_string != '') { $post_string .= '&'; } $post_string .= "{$p}={$v}"; } curl_setopt($curl, CURLOPT_POSTFIELDS, $post_string); } // Close the session to avoid timeout session_write_close(); $http_response = curl_exec($curl); $header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE); $header = substr($http_response, 0, $header_size); $body = substr($http_response, $header_size); preg_match_all('|Set-Cookie: (.*);|U', $header, $cookies_array); foreach ($cookies_array[1] as $c) { $cookies_tmp .= "&{$c}"; } TikiLib::parse_str($cookies_tmp, $cookies_titi); if (!is_array($cookies)) { $cookies = array(); } $cookies = array_merge($cookies, $cookies_titi); $buffer = $body; curl_close($curl); } if (extension_loaded('tidy')) { $data = tidy_parse_string($data, array(), 'utf8'); $buffer = tidy_parse_string($buffer, array(), 'utf8'); if ($use_tidy) { tidy_diagnose($data); $result['ref_error_count'] = tidy_error_count($data); $result['ref_error_msg'] = tidy_get_error_buffer($data); tidy_diagnose($buffer); $result['replay_error_count'] = tidy_error_count($buffer); $result['replay_error_msg'] = tidy_get_error_buffer($buffer); } } else { if (!$loaded) { require_once 'lib/htmlpurifier_tiki/HTMLPurifier.tiki.php'; $config = getHTMLPurifierTikiConfig(); $purifier = new HTMLPurifier($config); $loaded = true; } if ($purifier) { $data = '<html><body>' . $purifier->purify($data) . '</body></html>'; $buffer = '<html><body>' . $purifier->purify($buffer) . '</body></html>'; } $result['ref_error_msg'] = tra('The Tidy extension is not present'); $result['replay_error_msg'] = tra('The Tidy extension is not present'); } // If we have a XPath then we extract the new DOM and print it in HTML if (trim($xpath) != '') { $dom_ref = DOMDocument::loadHTML($data); $xp_ref = new DomXPath($dom_ref); $res_ref = $xp_ref->query($xpath); $new_data = new DOMDocument('1.0'); $root = $new_data->createElement('html'); $root = $new_data->appendChild($root); $body = $new_data->createElement('html'); $body = $root->appendChild($body); foreach ($res_ref as $ref) { $tmp = $new_data->importNode($ref, TRUE); $body->appendChild($tmp); } $data = $new_data->saveHTML(); $dom_buffer = DOMDocument::loadHTML($buffer); $xp_buffer = new DomXPath($dom_buffer); $res_buffer = $xp_buffer->query($xpath); $new_buffer = new DOMDocument('1.0'); $root = $new_buffer->createElement('html'); $root = $new_buffer->appendChild($root); $body = $new_buffer->createElement('html'); $body = $root->appendChild($body); foreach ($res_buffer as $ref) { $tmp = $new_buffer->importNode($ref, TRUE); $body->appendChild($tmp); } $buffer = $new_buffer->saveHTML(); } $tmp = diff2($data, $buffer, "htmldiff"); if (trim($xpath) != '') { $result['html'] = preg_replace(array("/<html>/", "/<\\/html>/"), array("<div style='overflow: auto; width:500px; text-align: center'> ", "</div>"), $tmp); } else { $result['html'] = preg_replace(array("/<html.*<body/U", "/<\\/body><\\/html>/U"), array("<div style='overflow: auto; width:500px; text-align: center' ", "</div>"), $tmp); } $result['url'] = $urlstr; $result['method'] = $url->getAttribute('method'); if (strtolower($result['method']) == 'post') { $result['post'] = $post; } return $result; }
<?php /* * cleanhtml.php * * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents * if no file is provided, it reads from standard input. * * NOTE: Works only with tidy for PHP 4.3.x, for tidy in PHP 5 see cleanhtml5.php * * By: John Coggeshall <*****@*****.**> * * Usage: php cleanhtml.php [filename] * */ if (!isset($_SERVER['argv'][1])) { $data = file_get_contents("php://stdin"); tidy_parse_string($data); } else { tidy_parse_file($_SERVER['argv'][1]); } tidy_clean_repair(); if (tidy_warning_count() || tidy_error_count()) { echo "\n\nThe following errors or warnings occurred:\n"; echo tidy_get_error_buffer(); echo "\n"; } echo tidy_get_output();
/** * Use HTML Tidy to validate the $text * Only runs when $config['HTML_Tidy'] is off * * @param string $text The html content to be checked. Passed by reference */ static function tidyFix(&$text, $ignore_config = false) { global $config; if (!$ignore_config) { if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') { return true; } } if (!function_exists('tidy_parse_string')) { return false; } $options = array(); $options['wrap'] = 0; //keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below $options['doctype'] = 'omit'; //omit, auto, strict, transitional, user $options['drop-empty-paras'] = true; //drop empty paragraphs $options['output-xhtml'] = true; //need this so that <br> will be <br/> .. etc $options['show-body-only'] = true; $options['hide-comments'] = false; //$options['anchor-as-name'] = true; //default is true, but not alwasy availabel. When true, adds an id attribute to anchor; when false, removes the name attribute... poorly designed, but we need it to be true // // php4 // if (function_exists('tidy_setopt')) { $options['char-encoding'] = 'utf8'; gp_edit::tidyOptions($options); $tidy = tidy_parse_string($text); tidy_clean_repair(); if (tidy_get_status() === 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy))); return false; } $text = tidy_get_output(); // // php5 // } else { $tidy = tidy_parse_string($text, $options, 'utf8'); tidy_clean_repair($tidy); if (tidy_get_status($tidy) === 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy))); return false; } $text = tidy_get_output($tidy); } return true; }
/** * Executes the tidy command and returns an array of result lines * * @param string $markup * * @return array */ public function executeTidy($markup) { $tidy = tidy_parse_string($markup); $lines = explode("\n", tidy_get_error_buffer($tidy)); return $lines; }
<?php $tidy = tidy_parse_file("intro2_ex1.html"); tidy_clean_repair($tidy); echo tidy_get_error_buffer($tidy);
<?php /* Parse a file */ $tidy1 = tidy_parse_file("myfile.html"); /* Parse a string */ $tidy2 = tidy_parse_string("<HTML><B>Hello!</B>"); /* Clean up the markup */ tidy_clean_repair($tidy1); tidy_clean_repair($tidy2); /* Get the error buffer */ $errors = tidy_get_error_buffer($tidy1); /* Get the output */ $output = tidy_get_output($tidy2);