Example #1
0
function tidy_html($html)
{
    $tidy_config = array('output-xhtml' => true, 'show-body-only' => true);
    $tidy = tidy_parse_string($html, $tidy_config, 'UTF8');
    $tidy->cleanRepair();
    return tidy_get_output($tidy);
}
Example #2
0
 /**
  * Filter a content item's content
  *
  * @return string
  */
 function filter($item, $field = "content", $length = 0)
 {
     $nodefilters = array();
     if (is_a($item, 'Zoo_Content_Interface')) {
         $txt = $item->{$field};
         $nodefilters = Zoo::getService('content')->getFilters($item);
     } else {
         $txt = $item;
     }
     if ($length > 0) {
         $txt = substr($txt, 0, $length);
     }
     if (count($nodefilters)) {
         $ids = array();
         foreach ($nodefilters as $nodefilter) {
             $ids[] = $nodefilter->filter_id;
         }
         $filters = Zoo::getService('filter')->getFilters($ids);
         foreach ($filters as $filter) {
             $txt = $filter->filter($txt);
         }
         if (extension_loaded('tidy')) {
             $config = array('indent' => TRUE, 'show-body-only' => TRUE, 'output-xhtml' => TRUE, 'wrap' => 0);
             $tidy = tidy_parse_string($txt, $config, 'UTF8');
             $tidy->cleanRepair();
             $txt = tidy_get_output($tidy);
         }
     } else {
         $txt = htmlspecialchars($txt);
     }
     return $txt;
 }
 /**
  * Use the HTML tidy extension to use the tidy library in-process,
  * saving the overhead of spawning a new process.
  *
  * @param string $text HTML to check
  * @param bool $stderr Whether to read result from error status instead of output
  * @param int &$retval Exit code (-1 on internal error)
  * @return string|null
  */
 protected function cleanWrapped($text, $stderr = false, &$retval = null)
 {
     if (!class_exists('tidy')) {
         wfWarn("Unable to load internal tidy class.");
         $retval = -1;
         return null;
     }
     $tidy = new \tidy();
     $tidy->parseString($text, $this->config['tidyConfigFile'], 'utf8');
     if ($stderr) {
         $retval = $tidy->getStatus();
         return $tidy->errorBuffer;
     }
     $tidy->cleanRepair();
     $retval = $tidy->getStatus();
     if ($retval == 2) {
         // 2 is magic number for fatal error
         // http://www.php.net/manual/en/function.tidy-get-status.php
         $cleansource = null;
     } else {
         $cleansource = tidy_get_output($tidy);
         if (!empty($this->config['debugComment']) && $retval > 0) {
             $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '--&gt;', $tidy->errorBuffer) . "\n-->";
         }
     }
     return $cleansource;
 }
Example #4
0
 /**
  * Filter through Tidy
  * 
  * @param  array
  * @param  string
  * @param  bool
  * @return bool
  */
 public static function tidy(array &$headers, &$body, $uncached)
 {
     $tidy = new tidy();
     $tidy->parseString($body, array('clean' => 1, 'bare' => 1, 'hide-comments' => 1, 'doctype' => 'omit', 'indent-spaces' => 0, 'tab-size' => 0, 'wrap' => 0, 'quote-ampersand' => 0, 'output-xhtml' => true, 'quiet' => 1), 'utf8');
     $tidy->cleanRepair();
     $body = tidy_get_output($tidy);
 }
function run_loop($aHtml, $aCount)
{
    for ($i = 0; $i < $aCount; $i++) {
        $tidy = new tidy();
        $tidy->parseString($aHtml, array('output-xml' => true, 'clean' => true, 'numeric-entities' => true), 'utf8');
        $tidy->cleanRepair();
        //echo tidy_get_output( $tidy );
        $xml = simplexml_load_string(tidy_get_output($tidy));
        if ($xml === false) {
            file_put_contents('php://stderr', 'Unable to parse file');
            return;
        }
        unset($xml);
        unset($tidy);
    }
}
Example #6
0
/**
 * Turn a string or array into valid, standards-compliant (x)HTML
 *
 * Uses configuraton options in tidy.conf - which should minimally have show-body-only set to yes
 *
 * @param mixed $text The data to be tidied up
 * @return mixed $result Tidied data
 */
function tidy($text)
{
    static $tidy_funcs;
    static $tidy_conf;
    if (!isset($tidy_conf)) {
        $tidy_conf = SETTINGS_INC . 'tidy.conf';
    }
    if (is_array($text)) {
        $result = array();
        foreach (array_keys($text) as $key) {
            $result[$key] = tidy($text[$key]);
        }
        return $result;
    }
    // determine what tidy libraries are available
    if (empty($tidy_funcs)) {
        $tidy_funcs = get_extension_funcs('tidy');
    }
    $tidy_1_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) !== false;
    $tidy_2_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) === false;
    $tidy_command_line_available = TIDY_EXE ? file_exists(TIDY_EXE) : false;
    $text = protect_string_from_tidy($text);
    $text = '<html><body>' . $text . '</body></html>';
    if ($tidy_2_lib_available) {
        $tidy = new tidy();
        $tidy->parseString($text, $tidy_conf, 'utf8');
        $tidy->cleanRepair();
        $result = $tidy;
    } elseif ($tidy_1_lib_available) {
        tidy_load_config($tidy_conf);
        tidy_set_encoding('utf8');
        tidy_parse_string($text);
        tidy_clean_repair();
        $result = tidy_get_output();
    } elseif ($tidy_command_line_available) {
        $arg = escapeshellarg($text);
        // escape the bad stuff in the text
        $cmd = 'echo ' . $arg . ' | ' . TIDY_EXE . ' -q -config ' . $tidy_conf . ' 2> /dev/null';
        // the actual command - pipes the input to tidy which diverts its output to the random file
        $result = shell_exec($cmd);
        // execute the command
    } else {
        trigger_error('tidy does not appear to be available within php or at the command line - no tidying is taking place.');
        $result = $text;
    }
    return trim($result);
}
Example #7
0
 public function process($html)
 {
     if (!$this->enabled) {
         return $html;
     }
     if ($this->addTimeMark) {
         Debugger::timer('tidy');
     }
     $this->parseString($html, $this->config, 'utf8');
     if ($this->runCleanRepair) {
         $this->cleanRepair();
     }
     $output = tidy_get_output($this);
     if ($this->addTimeMark) {
         $elapsed = Debugger::timer('tidy');
         $output .= "\n\n<!-- Tidy formatting took: " . number_format($elapsed * 1000, 2) . " ms -->";
     }
     return $output;
 }
 /**
  * Reads input and returns Tidy-filtered output.
  *
  * @param null $len
  *
  * @throws BuildException
  * @return the resulting stream, or -1 if the end of the resulting stream has been reached
  *
  */
 public function read($len = null)
 {
     if (!class_exists('Tidy')) {
         throw new BuildException("You must enable the 'tidy' extension in your PHP configuration in order to use the Tidy filter.");
     }
     if (!$this->getInitialized()) {
         $this->_initialize();
         $this->setInitialized(true);
     }
     $buffer = $this->in->read($len);
     if ($buffer === -1) {
         return -1;
     }
     $config = $this->getDistilledConfig();
     $tidy = new Tidy();
     $tidy->parseString($buffer, $config, $this->encoding);
     $tidy->cleanRepair();
     return tidy_get_output($tidy);
 }
Example #9
0
File: HtmlTidy.php Project: ksst/kf
 public function executeFilter(HttpRequestInterface $request, HttpResponseInterface $response)
 {
     // htmltidy must be enabled in configuration
     if ($this->config['htmltidy']['enabled'] === 1 and extension_loaded('tidy')) {
         // bypass
         return;
     }
     // get output from response
     $content = $response->getContent();
     // init tidy
     $tidy = new tidy();
     /*
     $tidyoptions = array(
        'indent-spaces'    => 4,
         'wrap'             => 120,
         'indent'           => auto,
         'tidy-mark'        => true,
         'show-body-only'   => true,
         'force-output'     => true,
         'output-xhtml'     => true,
         'clean'            => true,
         'hide-comments'    => false,
         'join-classes'     => false,
         'join-styles'      => false,
         'doctype'          => 'strict',
         'lower-literals'   => true,
         'quote-ampersand'  => true,
         'wrap'             => 0,
         'drop-font-tags'   => true,
         'drop-empty-paras' => true,
         'drop-proprietary-attributes' => true);
     */
     $tidyoptions = ['clean' => true, 'doctype' => 'transitional', 'output-xhtml' => true, 'drop-proprietary-attributes' => true, 'lower-literals' => true, 'show-body-only' => false, 'indent-spaces' => 4, 'wrap' => 130, 'indent' => 'auto'];
     // tidy the output
     $tidy->parseString($content, $tidyoptions, 'utf8');
     $tidy->cleanRepair();
     // @todo diagnose? errorreport?
     // set output to response
     $response->setContent(tidy_get_output($tidy), true);
 }
Example #10
0
 /**
  * tidy the data
  *
  * @access	public
  * @param	string		data
  * @return	string		compressed data
  */
 function apply($data)
 {
     if (!function_exists('tidy_parse_string')) {
         return $data;
     }
     /**
      * tidy 1.0
      */
     if (function_exists('tidy_setopt') && is_array($this->_params)) {
         foreach ($this->_params as $opt => $value) {
             tidy_setopt($opt, $value);
         }
         tidy_parse_string($data);
         tidy_clean_repair();
         $data = tidy_get_output();
     } else {
         $tidy = tidy_parse_string($data, $this->_params);
         tidy_clean_repair($tidy);
         $data = tidy_get_output($tidy);
     }
     return $data;
 }
Example #11
0
function return_parsed_bbcode($message, $nowrap = false)
{
    // never strip_tags here, see Page.Talks for details
    $message = str_replace("[b]", "<b>", $message);
    $message = str_replace("[/b]", "</b>", $message);
    $message = str_replace("[i]", "<i>", $message);
    $message = str_replace("[/i]", "</i>", $message);
    $message = str_replace("[u]", "<u>", $message);
    $message = str_replace("[/u]", "</u>", $message);
    $message = str_replace("[center]", "<div align=\"center\">", $message);
    $message = str_replace("[/center]", "</div>", $message);
    $message = str_replace("[left]", "<div align=\"left\">", $message);
    $message = str_replace("[/left]", "</div>", $message);
    $message = str_replace("[right]", "<div align=\"right\">", $message);
    $message = str_replace("[/right]", "</div>", $message);
    $message = str_replace("[ol]", "<ol>", $message);
    $message = str_replace("[ul]", "<ul>", $message);
    $message = str_replace("[li]", "<li>", $message);
    $message = str_replace("[/ol]", "</ol>", $message);
    $message = str_replace("[/ul]", "</ul>", $message);
    $message = str_replace("[br]", "<br>", $message);
    $message = eregi_replace("\\[img\\]([^\\[]*)\\[/img\\]", "<img src=\"\\1\" border=\"0\">", $message);
    $message = eregi_replace("\\[url\\](https?://[^\\[]*)\\[/url\\]", "<a href=\"\\1\">\\1</a>", $message);
    if (function_exists("tidy_get_output")) {
        if (!$nowrap) {
            $config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE, 'wrap' => 80);
        } else {
            $config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE);
        }
        tidy_set_encoding('UTF8');
        foreach ($config as $key => $value) {
            tidy_setopt($key, $value);
        }
        tidy_parse_string($message);
        tidy_clean_repair();
        $message = tidy_get_output();
    }
    return $message;
}
Example #12
0
 function TidyClean()
 {
     if (!class_exists('tidy')) {
         if (function_exists('tidy_parse_string')) {
             //use procedural style for compatibility with PHP 4.3
             tidy_set_encoding($this->Encoding);
             foreach ($this->TidyConfig as $key => $value) {
                 tidy_setopt($key, $value);
             }
             tidy_parse_string($this->html);
             tidy_clean_repair();
             $this->html = tidy_get_output();
         } else {
             print "<b>No tidy support. Please enable it in your php.ini.\r\nOnly basic cleaning is beeing applied\r\n</b>";
         }
     } else {
         //PHP 5 only !!!
         $tidy = new tidy();
         $tidy->parseString($this->html, $this->TidyConfig, $this->Encoding);
         $tidy->cleanRepair();
         $this->html = $tidy;
     }
 }
Example #13
0
 /**
  * Tidyfication of the strings
  *
  * @param string $str
  * @return string
  */
 public function tidyCleaner($str)
 {
     eZDebug::accumulatorStart('eztidytemplateoperator', 'Tidy', 'Tidy template operator');
     if (!class_exists('tidy')) {
         eZDebug::writeError("phpTidy isn't installed", 'eZTidy::tidyCleaner()');
         return $str;
     }
     $str = trim($str);
     if ($str == "") {
         return "";
     }
     $this->tidy = new tidy();
     $this->tidy->parseString($str, $this->config, $this->options['charset']);
     $this->tidy->cleanRepair();
     $this->isTidyfied = true;
     $this->reportWarning();
     $output = tidy_get_output($this->tidy);
     if (strtolower($this->options['showTidyElement']) == 'enabled') {
         return "<!-- Tidy - Begin -->\n" . $output . "\n<!-- Tidy - End -->";
     }
     eZDebug::accumulatorStop('eztidytemplateoperator');
     return $output;
 }
Example #14
0
 /**
  * Delivers a PDF file from XHTML
  *
  * @param string $html The XHTML string
  * @access public
  */
 public function deliverPDFfromHTML($content, $title = NULL)
 {
     $content = preg_replace("/href=\".*?\"/", "", $content);
     $printbody = new ilTemplate("tpl.il_as_tst_print_body.html", TRUE, TRUE, "Modules/Test");
     $printbody->setVariable("TITLE", ilUtil::prepareFormOutput($this->getTitle()));
     $printbody->setVariable("ADM_CONTENT", $content);
     $printbody->setCurrentBlock("css_file");
     $printbody->setVariable("CSS_FILE", $this->getTestStyleLocation("filesystem"));
     $printbody->parseCurrentBlock();
     $printbody->setCurrentBlock("css_file");
     $printbody->setVariable("CSS_FILE", ilUtil::getStyleSheetLocation("filesystem", "delos.css"));
     $printbody->parseCurrentBlock();
     $printoutput = $printbody->get();
     $html = str_replace("href=\"./", "href=\"" . ILIAS_HTTP_PATH . "/", $printoutput);
     $html = preg_replace("/<div id=\"dontprint\">.*?<\\/div>/ims", "", $html);
     if (extension_loaded("tidy")) {
         $config = array("indent" => false, "output-xml" => true, "numeric-entities" => true);
         $tidy = new tidy();
         $tidy->parseString($html, $config, 'utf8');
         $tidy->cleanRepair();
         $html = tidy_get_output($tidy);
         $html = preg_replace("/^.*?(<html)/", "\\1", $html);
     } else {
         $html = str_replace("&nbsp;", "&#160;", $html);
         $html = str_replace("&otimes;", "X", $html);
     }
     $html = preg_replace("/src=\".\\//ims", "src=\"" . ILIAS_HTTP_PATH . "/", $html);
     $this->deliverPDFfromFO($this->processPrintoutput2FO($html), $title);
 }
         $node->appendChild($dom->createTextNode('.1'));
         //hack to trigger an update for 4.1.10 release due translations not being loaded on updates
     }
 }
 $results = $xpath->query('//*[@locale]');
 for ($i = 0; $i < $results->length; $i++) {
     $results->item($i)->setAttribute('locale', $locale);
 }
 $out = $dom->saveXML();
 if (function_exists('tidy_get_output')) {
     $tidy = new tidy();
     $tidy_config = array('input-xml' => true, 'output-xml' => true, 'indent' => true, 'wrap' => 0);
     $tidy->isXML();
     $tidy->parseString($out, $tidy_config, 'UTF8');
     $tidy->cleanRepair();
     $out = tidy_get_output($tidy);
 }
 if ($booleans['create_archive']) {
     $cwd = realpath(getcwd());
     $c_path = realpath($configs[$module]);
     if (strpos($c_path, $cwd) !== 0) {
         I2CE::raiseError("Cannot determine module sub-directory structure for {$module}", E_USER_ERROR);
     }
     $target_dir = $archive_dir . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR . substr($c_path, strlen($cwd)) . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR;
 } else {
     $target_dir = $configs[$module] . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR;
 }
 if (!is_dir($target_dir)) {
     if (!mkdir($target_dir, 0775, true)) {
         I2CE::raiseError("Could not created {$target_dir}", E_USER_ERROR);
     }
Example #16
0
function tidyToXml($htmlTagSoup)
{
    // Create the Tidy object
    $tidy = new Tidy();
    // Parse the HTML into memory, turning on the option to convert to
    // XHTML as part of the tidying process
    $tidy->parseString($htmlTagSoup, array('output-xhtml' => true));
    // Do the tidying
    $tidy->cleanRepair();
    // And get the tidied version as a string
    $tidied_xml = tidy_get_output($tidy);
    // Opinions seem to differ as to whether the non-breaking space
    // entity '&nbsp;' is predeclared as part of XHTML.  Tidy thinks it
    // is, and so leaves it alone, while the XML parser we're about to
    // use on this string thinks otherwise.  So replace any occurrences
    // of it with its numeric equivalent (which doesn't need to be
    // declared).
    return str_replace('&nbsp;', '&#160;', $tidied_xml);
}
Example #17
0
    fwrite($pipes[0], $source);
    fclose($pipes[0]);
    // Read clean source out to the browser
    while (!feof($pipes[1])) {
        //echo fgets($pipes[1], 1024);
        $newsrc .= fgets($pipes[1], 1024);
    }
    fclose($pipes[1]);
    // Clean up after ourselves
    proc_close($process);
} else {
    /* Use tidy if it's available from PECL */
    if (function_exists('tidy_parse_string')) {
        $tempsrc = tidy_parse_string($source);
        tidy_clean_repair();
        $newsrc = tidy_get_output();
    } else {
        // Better give them back what they came with, so they don't lose it all...
        $newsrc = "<body>\n" . $source . "\n</body>";
    }
}
// Split our source into an array by lines
$srcLines = preg_split("/\n/", $newsrc, -1, PREG_SPLIT_NO_EMPTY);
// Get only the lines between the body tags
$startLn = 0;
while (strpos($srcLines[$startLn++], '<body') === false && $startLn < sizeof($srcLines)) {
}
$endLn = $startLn;
while (strpos($srcLines[$endLn++], '</body') === false && $endLn < sizeof($srcLines)) {
}
$srcLines = array_slice($srcLines, $startLn, $endLn - $startLn - 1);
function tidy_html($input_string)
{
    // Detect if Tidy is in configured
    if (function_exists('tidy_get_release')) {
        # Tidy for PHP version 4
        if (substr(phpversion(), 0, 1) == 4) {
            tidy_setopt('uppercase-attributes', TRUE);
            tidy_setopt('wrap', 800);
            tidy_parse_string($input_string);
            $cleaned_html = tidy_get_output();
        }
        # Tidy for PHP version 5
        if (substr(phpversion(), 0, 1) == 5) {
            $config = array('uppercase-attributes' => true, 'wrap' => 800);
            $tidy = new tidy();
            $tidy->parseString($input_string, $config, 'utf8');
            $tidy->cleanRepair();
            $cleaned_html = tidy_get_output($tidy);
        }
    } else {
        # Tidy not configured for this computer
        $cleaned_html = $input_string;
    }
    return $cleaned_html;
}
 /**
  * Convert a print output to XSL-FO
  *
  * @param string $print_output The print output
  * @return string XSL-FO code
  * @access public
  */
 function processPrintoutput2FO($print_output)
 {
     global $ilLog;
     if (extension_loaded("tidy")) {
         $config = array("indent" => false, "output-xml" => true, "numeric-entities" => true);
         $tidy = new tidy();
         $tidy->parseString($print_output, $config, 'utf8');
         $tidy->cleanRepair();
         $print_output = tidy_get_output($tidy);
         $print_output = preg_replace("/^.*?(<html)/", "\\1", $print_output);
     } else {
         $print_output = str_replace("&nbsp;", "&#160;", $print_output);
         $print_output = str_replace("&otimes;", "X", $print_output);
     }
     $xsl = file_get_contents("./Modules/Survey/xml/question2fo.xsl");
     // additional font support
     $xsl = str_replace('font-family="Helvetica, unifont"', 'font-family="' . $GLOBALS['ilSetting']->get('rpc_pdf_font', 'Helvetica, unifont') . '"', $xsl);
     $args = array('/_xml' => $print_output, '/_xsl' => $xsl);
     $xh = xslt_create();
     $params = array();
     $output = xslt_process($xh, "arg:/_xml", "arg:/_xsl", NULL, $args, $params);
     xslt_error($xh);
     xslt_free($xh);
     $ilLog->write($output);
     return $output;
 }
 /**
  * Use the HTML tidy PECL extension to use the tidy library in-process,
  * saving the overhead of spawning a new process.
  *
  * 'pear install tidy' should be able to compile the extension module.
  *
  * @private
  * @static
  */
 function internalTidy($text)
 {
     global $wgTidyConf, $IP;
     $fname = 'Parser::internalTidy';
     wfProfileIn($fname);
     $tidy = new tidy();
     $tidy->parseString($text, $wgTidyConf, 'utf8');
     $tidy->cleanRepair();
     if ($tidy->getStatus() == 2) {
         // 2 is magic number for fatal error
         // http://www.php.net/manual/en/function.tidy-get-status.php
         $cleansource = null;
     } else {
         $cleansource = tidy_get_output($tidy);
     }
     wfProfileOut($fname);
     return $cleansource;
 }
Example #21
0
 /**
  * Use the HTML tidy PECL extension to use the tidy library in-process,
  * saving the overhead of spawning a new process. 
  *
  * 'pear install tidy' should be able to compile the extension module.
  *
  * @private
  * @static
  */
 function internalTidy($text)
 {
     global $wgTidyConf, $IP, $wgDebugTidy;
     $fname = 'Parser::internalTidy';
     wfProfileIn($fname);
     $tidy = new tidy();
     $tidy->parseString($text, $wgTidyConf, 'utf8');
     $tidy->cleanRepair();
     if ($tidy->getStatus() == 2) {
         // 2 is magic number for fatal error
         // http://www.php.net/manual/en/function.tidy-get-status.php
         $cleansource = null;
     } else {
         $cleansource = tidy_get_output($tidy);
     }
     if ($wgDebugTidy && $tidy->getStatus() > 0) {
         $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '--&gt;', $tidy->errorBuffer) . "\n-->";
     }
     wfProfileOut($fname);
     return $cleansource;
 }
Example #22
0
 /**
  * Use HTML Tidy to validate the $text
  * Only runs when $config['HTML_Tidy'] is off
  *
  * @param string $text The html content to be checked. Passed by reference
  */
 static function tidyFix(&$text, $ignore_config = false)
 {
     global $config;
     if (!$ignore_config) {
         if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') {
             return true;
         }
     }
     if (!function_exists('tidy_parse_string')) {
         return false;
     }
     $options = array();
     $options['wrap'] = 0;
     //keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below
     $options['doctype'] = 'omit';
     //omit, auto, strict, transitional, user
     $options['drop-empty-paras'] = true;
     //drop empty paragraphs
     $options['output-xhtml'] = true;
     //need this so that <br> will be <br/> .. etc
     $options['show-body-only'] = true;
     $options['hide-comments'] = false;
     //$options['anchor-as-name'] = true;		//default is true, but not alwasy availabel. When true, adds an id attribute to anchor; when false, removes the name attribute... poorly designed, but we need it to be true
     //
     //	php4
     //
     if (function_exists('tidy_setopt')) {
         $options['char-encoding'] = 'utf8';
         gp_edit::tidyOptions($options);
         $tidy = tidy_parse_string($text);
         tidy_clean_repair();
         if (tidy_get_status() === 2) {
             // 2 is magic number for fatal error
             // http://www.php.net/manual/en/function.tidy-get-status.php
             $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
             return false;
         }
         $text = tidy_get_output();
         //
         //	php5
         //
     } else {
         $tidy = tidy_parse_string($text, $options, 'utf8');
         tidy_clean_repair($tidy);
         if (tidy_get_status($tidy) === 2) {
             // 2 is magic number for fatal error
             // http://www.php.net/manual/en/function.tidy-get-status.php
             $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
             return false;
         }
         $text = tidy_get_output($tidy);
     }
     return true;
 }
Example #23
0
<?php

/*
 * cleanhtml.php
 *
 * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents
 * if no file is provided, it reads from standard input.
 *
 * NOTE: Works only with tidy for PHP 4.3.x, for tidy in PHP 5 see cleanhtml5.php
 *
 * By: John Coggeshall <*****@*****.**>
 *
 * Usage: php cleanhtml.php [filename]
 *
 */
if (!isset($_SERVER['argv'][1])) {
    $data = file_get_contents("php://stdin");
    tidy_parse_string($data);
} else {
    tidy_parse_file($_SERVER['argv'][1]);
}
tidy_clean_repair();
if (tidy_warning_count() || tidy_error_count()) {
    echo "\n\nThe following errors or warnings occurred:\n";
    echo tidy_get_error_buffer();
    echo "\n";
}
echo tidy_get_output();
Example #24
0
 /**
  * Use the HTML tidy extension to use the tidy library in-process,
  * saving the overhead of spawning a new process.
  *
  * @param string $text HTML to check
  * @param bool $stderr Whether to read result from error status instead of output
  * @param int &$retval Exit code (-1 on internal error)
  * @return string|null
  */
 private static function phpClean($text, $stderr = false, &$retval = null)
 {
     global $wgTidyConf, $wgDebugTidy;
     if (!wfIsHHVM() && !class_exists('tidy') || wfIsHHVM() && !function_exists('tidy_repair_string')) {
         wfWarn("Unable to load internal tidy class.");
         $retval = -1;
         return null;
     }
     $tidy = new tidy();
     $tidy->parseString($text, $wgTidyConf, 'utf8');
     if ($stderr) {
         $retval = $tidy->getStatus();
         return $tidy->errorBuffer;
     }
     $tidy->cleanRepair();
     $retval = $tidy->getStatus();
     if ($retval == 2) {
         // 2 is magic number for fatal error
         // http://www.php.net/manual/en/function.tidy-get-status.php
         $cleansource = null;
     } else {
         $cleansource = tidy_get_output($tidy);
         if ($wgDebugTidy && $retval > 0) {
             $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '--&gt;', $tidy->errorBuffer) . "\n-->";
         }
     }
     return $cleansource;
 }
Example #25
0
        }
        if (check_for_add_field($line)) {
            continue;
        }
        if (check_for_set_map($line)) {
            continue;
        }
    }
}
$text = $config->saveXML($topNode);
$tidy = new tidy();
$config = array('input-xml' => true, 'output-xml' => true, 'indent' => true, 'wrap' => 0);
$tidy->isXML();
$tidy->parseString($text, $config, 'UTF8');
$tidy->cleanRepair();
file_put_contents("config_formClass.xml", tidy_get_output($tidy) . "\n");
function check_for_add_field($line)
{
    global $config;
    global $red;
    global $black;
    global $classNode;
    global $fieldsNode;
    if (!preg_match('/^\\s*\\$this->addField\\(\\s*(.*)\\s*\\);\\s*$/', $line, $matches)) {
        if (preg_match('/addField/', $line)) {
            echo "{$red}Found addField() but don't know how to deal with it at:{$black}\n\t{$line}\n";
        }
        return false;
    }
    $fields = explode(',', $matches[1]);
    //we hope that there are no commas in the names
Example #26
0
 private function repairHtml($node)
 {
     if (class_exists('tidy')) {
         $c = array('clean' => true, 'output-xhtml' => true, 'show-body-only' => true, 'input-xml' => true);
         $tidy = new tidy();
         $tidy->parseString($node, $c, 'utf8');
         $tidy->cleanRepair();
         $node = tidy_get_output($tidy);
         $tidy->diagnose();
         //			SPConfig::debOut( $tidy->errorBuffer );
         return $node;
     }
     return "Error - cannot repair";
 }
 public function htmlIndentation($html)
 {
     if (class_exists('tidy')) {
         $config = array('char-encoding' => 'utf8', 'vertical-space' => false, 'indent' => true, 'wrap' => 0, 'word-2000' => 1, 'break-before-br' => true, 'indent-cdata' => true);
         $tidy = new \Tidy();
         $tidy->parseString($html, $config);
         return str_replace('>' . PHP_EOL . '</', '></', tidy_get_output($tidy));
     } else {
         return $html;
     }
 }
Example #28
0
 /**
  * Use the HTML tidy PECL extension to use the tidy library in-process,
  * saving the overhead of spawning a new process.
  *
  * 'pear install tidy' should be able to compile the extension module.
  */
 private static function execInternalTidy($text, $stderr = false, &$retval = null)
 {
     global $wgTidyConf, $IP, $wgDebugTidy;
     wfProfileIn(__METHOD__);
     $tidy = new tidy();
     $tidy->parseString($text, $wgTidyConf, 'utf8');
     if ($stderr) {
         $retval = $tidy->getStatus();
         return $tidy->errorBuffer;
     } else {
         $tidy->cleanRepair();
         $retval = $tidy->getStatus();
         if ($retval == 2) {
             // 2 is magic number for fatal error
             // http://www.php.net/manual/en/function.tidy-get-status.php
             $cleansource = null;
         } else {
             $cleansource = tidy_get_output($tidy);
         }
         if ($wgDebugTidy && $retval > 0) {
             $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '--&gt;', $tidy->errorBuffer) . "\n-->";
         }
         wfProfileOut(__METHOD__);
         return $cleansource;
     }
 }
Example #29
0
        $datetext = str_replace('juliol', 'July', $datetext);
        $datetext = str_replace('agost', 'August', $datetext);
        $datetext = str_replace('setembre', 'September', $datetext);
        $datetext = str_replace('octubre', 'October', $datetext);
        $datetext = str_replace('novembre', 'November', $datetext);
        $datetext = str_replace('desembre', 'December', $datetext);
        $date = date_create_from_format('F d, Y H:i:s', $datetext . ' 00:00:00');
        $newItem->setDate($date->format('Y-m-d H:i:s'));
        //Now add the feed item
        $TestFeed->addItem($newItem);
        $feed_count++;
    }
    $texts = $html->find('text');
    $go_on = FALSE;
    foreach ($texts as $text) {
        if ($text->plaintext == '&laquo; Older Entries') {
            //Not sleeping, Wordpress.com does not appear to be rate-limited
            $html_text = file_get_contents($text->parent->href) or exit(1);
            $tidy = tidy_parse_string($html_text, $tidy_config, 'UTF8');
            tidy_clean_repair($tidy);
            $html = str_get_html(tidy_get_output($tidy));
            $go_on = TRUE;
            break;
        }
    }
}
if ($feed_count == 0) {
    //No error but no feeds, this is wrong
    exit(1);
}
$TestFeed->generateFeed();
Example #30
0
<?php

$tidy = tidy_parse_file("intro2_ex1.html");
tidy_clean_repair($tidy);
echo tidy_get_output($tidy);