function tidy_html($html) { $tidy_config = array('output-xhtml' => true, 'show-body-only' => true); $tidy = tidy_parse_string($html, $tidy_config, 'UTF8'); $tidy->cleanRepair(); return tidy_get_output($tidy); }
/** * Filter a content item's content * * @return string */ function filter($item, $field = "content", $length = 0) { $nodefilters = array(); if (is_a($item, 'Zoo_Content_Interface')) { $txt = $item->{$field}; $nodefilters = Zoo::getService('content')->getFilters($item); } else { $txt = $item; } if ($length > 0) { $txt = substr($txt, 0, $length); } if (count($nodefilters)) { $ids = array(); foreach ($nodefilters as $nodefilter) { $ids[] = $nodefilter->filter_id; } $filters = Zoo::getService('filter')->getFilters($ids); foreach ($filters as $filter) { $txt = $filter->filter($txt); } if (extension_loaded('tidy')) { $config = array('indent' => TRUE, 'show-body-only' => TRUE, 'output-xhtml' => TRUE, 'wrap' => 0); $tidy = tidy_parse_string($txt, $config, 'UTF8'); $tidy->cleanRepair(); $txt = tidy_get_output($tidy); } } else { $txt = htmlspecialchars($txt); } return $txt; }
/** * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * @param string $text HTML to check * @param bool $stderr Whether to read result from error status instead of output * @param int &$retval Exit code (-1 on internal error) * @return string|null */ protected function cleanWrapped($text, $stderr = false, &$retval = null) { if (!class_exists('tidy')) { wfWarn("Unable to load internal tidy class."); $retval = -1; return null; } $tidy = new \tidy(); $tidy->parseString($text, $this->config['tidyConfigFile'], 'utf8'); if ($stderr) { $retval = $tidy->getStatus(); return $tidy->errorBuffer; } $tidy->cleanRepair(); $retval = $tidy->getStatus(); if ($retval == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); if (!empty($this->config['debugComment']) && $retval > 0) { $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '-->', $tidy->errorBuffer) . "\n-->"; } } return $cleansource; }
/** * Filter through Tidy * * @param array * @param string * @param bool * @return bool */ public static function tidy(array &$headers, &$body, $uncached) { $tidy = new tidy(); $tidy->parseString($body, array('clean' => 1, 'bare' => 1, 'hide-comments' => 1, 'doctype' => 'omit', 'indent-spaces' => 0, 'tab-size' => 0, 'wrap' => 0, 'quote-ampersand' => 0, 'output-xhtml' => true, 'quiet' => 1), 'utf8'); $tidy->cleanRepair(); $body = tidy_get_output($tidy); }
function run_loop($aHtml, $aCount) { for ($i = 0; $i < $aCount; $i++) { $tidy = new tidy(); $tidy->parseString($aHtml, array('output-xml' => true, 'clean' => true, 'numeric-entities' => true), 'utf8'); $tidy->cleanRepair(); //echo tidy_get_output( $tidy ); $xml = simplexml_load_string(tidy_get_output($tidy)); if ($xml === false) { file_put_contents('php://stderr', 'Unable to parse file'); return; } unset($xml); unset($tidy); } }
/** * Turn a string or array into valid, standards-compliant (x)HTML * * Uses configuraton options in tidy.conf - which should minimally have show-body-only set to yes * * @param mixed $text The data to be tidied up * @return mixed $result Tidied data */ function tidy($text) { static $tidy_funcs; static $tidy_conf; if (!isset($tidy_conf)) { $tidy_conf = SETTINGS_INC . 'tidy.conf'; } if (is_array($text)) { $result = array(); foreach (array_keys($text) as $key) { $result[$key] = tidy($text[$key]); } return $result; } // determine what tidy libraries are available if (empty($tidy_funcs)) { $tidy_funcs = get_extension_funcs('tidy'); } $tidy_1_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) !== false; $tidy_2_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) === false; $tidy_command_line_available = TIDY_EXE ? file_exists(TIDY_EXE) : false; $text = protect_string_from_tidy($text); $text = '<html><body>' . $text . '</body></html>'; if ($tidy_2_lib_available) { $tidy = new tidy(); $tidy->parseString($text, $tidy_conf, 'utf8'); $tidy->cleanRepair(); $result = $tidy; } elseif ($tidy_1_lib_available) { tidy_load_config($tidy_conf); tidy_set_encoding('utf8'); tidy_parse_string($text); tidy_clean_repair(); $result = tidy_get_output(); } elseif ($tidy_command_line_available) { $arg = escapeshellarg($text); // escape the bad stuff in the text $cmd = 'echo ' . $arg . ' | ' . TIDY_EXE . ' -q -config ' . $tidy_conf . ' 2> /dev/null'; // the actual command - pipes the input to tidy which diverts its output to the random file $result = shell_exec($cmd); // execute the command } else { trigger_error('tidy does not appear to be available within php or at the command line - no tidying is taking place.'); $result = $text; } return trim($result); }
public function process($html) { if (!$this->enabled) { return $html; } if ($this->addTimeMark) { Debugger::timer('tidy'); } $this->parseString($html, $this->config, 'utf8'); if ($this->runCleanRepair) { $this->cleanRepair(); } $output = tidy_get_output($this); if ($this->addTimeMark) { $elapsed = Debugger::timer('tidy'); $output .= "\n\n<!-- Tidy formatting took: " . number_format($elapsed * 1000, 2) . " ms -->"; } return $output; }
/** * Reads input and returns Tidy-filtered output. * * @param null $len * * @throws BuildException * @return the resulting stream, or -1 if the end of the resulting stream has been reached * */ public function read($len = null) { if (!class_exists('Tidy')) { throw new BuildException("You must enable the 'tidy' extension in your PHP configuration in order to use the Tidy filter."); } if (!$this->getInitialized()) { $this->_initialize(); $this->setInitialized(true); } $buffer = $this->in->read($len); if ($buffer === -1) { return -1; } $config = $this->getDistilledConfig(); $tidy = new Tidy(); $tidy->parseString($buffer, $config, $this->encoding); $tidy->cleanRepair(); return tidy_get_output($tidy); }
public function executeFilter(HttpRequestInterface $request, HttpResponseInterface $response) { // htmltidy must be enabled in configuration if ($this->config['htmltidy']['enabled'] === 1 and extension_loaded('tidy')) { // bypass return; } // get output from response $content = $response->getContent(); // init tidy $tidy = new tidy(); /* $tidyoptions = array( 'indent-spaces' => 4, 'wrap' => 120, 'indent' => auto, 'tidy-mark' => true, 'show-body-only' => true, 'force-output' => true, 'output-xhtml' => true, 'clean' => true, 'hide-comments' => false, 'join-classes' => false, 'join-styles' => false, 'doctype' => 'strict', 'lower-literals' => true, 'quote-ampersand' => true, 'wrap' => 0, 'drop-font-tags' => true, 'drop-empty-paras' => true, 'drop-proprietary-attributes' => true); */ $tidyoptions = ['clean' => true, 'doctype' => 'transitional', 'output-xhtml' => true, 'drop-proprietary-attributes' => true, 'lower-literals' => true, 'show-body-only' => false, 'indent-spaces' => 4, 'wrap' => 130, 'indent' => 'auto']; // tidy the output $tidy->parseString($content, $tidyoptions, 'utf8'); $tidy->cleanRepair(); // @todo diagnose? errorreport? // set output to response $response->setContent(tidy_get_output($tidy), true); }
/** * tidy the data * * @access public * @param string data * @return string compressed data */ function apply($data) { if (!function_exists('tidy_parse_string')) { return $data; } /** * tidy 1.0 */ if (function_exists('tidy_setopt') && is_array($this->_params)) { foreach ($this->_params as $opt => $value) { tidy_setopt($opt, $value); } tidy_parse_string($data); tidy_clean_repair(); $data = tidy_get_output(); } else { $tidy = tidy_parse_string($data, $this->_params); tidy_clean_repair($tidy); $data = tidy_get_output($tidy); } return $data; }
function return_parsed_bbcode($message, $nowrap = false) { // never strip_tags here, see Page.Talks for details $message = str_replace("[b]", "<b>", $message); $message = str_replace("[/b]", "</b>", $message); $message = str_replace("[i]", "<i>", $message); $message = str_replace("[/i]", "</i>", $message); $message = str_replace("[u]", "<u>", $message); $message = str_replace("[/u]", "</u>", $message); $message = str_replace("[center]", "<div align=\"center\">", $message); $message = str_replace("[/center]", "</div>", $message); $message = str_replace("[left]", "<div align=\"left\">", $message); $message = str_replace("[/left]", "</div>", $message); $message = str_replace("[right]", "<div align=\"right\">", $message); $message = str_replace("[/right]", "</div>", $message); $message = str_replace("[ol]", "<ol>", $message); $message = str_replace("[ul]", "<ul>", $message); $message = str_replace("[li]", "<li>", $message); $message = str_replace("[/ol]", "</ol>", $message); $message = str_replace("[/ul]", "</ul>", $message); $message = str_replace("[br]", "<br>", $message); $message = eregi_replace("\\[img\\]([^\\[]*)\\[/img\\]", "<img src=\"\\1\" border=\"0\">", $message); $message = eregi_replace("\\[url\\](https?://[^\\[]*)\\[/url\\]", "<a href=\"\\1\">\\1</a>", $message); if (function_exists("tidy_get_output")) { if (!$nowrap) { $config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE, 'wrap' => 80); } else { $config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE); } tidy_set_encoding('UTF8'); foreach ($config as $key => $value) { tidy_setopt($key, $value); } tidy_parse_string($message); tidy_clean_repair(); $message = tidy_get_output(); } return $message; }
function TidyClean() { if (!class_exists('tidy')) { if (function_exists('tidy_parse_string')) { //use procedural style for compatibility with PHP 4.3 tidy_set_encoding($this->Encoding); foreach ($this->TidyConfig as $key => $value) { tidy_setopt($key, $value); } tidy_parse_string($this->html); tidy_clean_repair(); $this->html = tidy_get_output(); } else { print "<b>No tidy support. Please enable it in your php.ini.\r\nOnly basic cleaning is beeing applied\r\n</b>"; } } else { //PHP 5 only !!! $tidy = new tidy(); $tidy->parseString($this->html, $this->TidyConfig, $this->Encoding); $tidy->cleanRepair(); $this->html = $tidy; } }
/** * Tidyfication of the strings * * @param string $str * @return string */ public function tidyCleaner($str) { eZDebug::accumulatorStart('eztidytemplateoperator', 'Tidy', 'Tidy template operator'); if (!class_exists('tidy')) { eZDebug::writeError("phpTidy isn't installed", 'eZTidy::tidyCleaner()'); return $str; } $str = trim($str); if ($str == "") { return ""; } $this->tidy = new tidy(); $this->tidy->parseString($str, $this->config, $this->options['charset']); $this->tidy->cleanRepair(); $this->isTidyfied = true; $this->reportWarning(); $output = tidy_get_output($this->tidy); if (strtolower($this->options['showTidyElement']) == 'enabled') { return "<!-- Tidy - Begin -->\n" . $output . "\n<!-- Tidy - End -->"; } eZDebug::accumulatorStop('eztidytemplateoperator'); return $output; }
/** * Delivers a PDF file from XHTML * * @param string $html The XHTML string * @access public */ public function deliverPDFfromHTML($content, $title = NULL) { $content = preg_replace("/href=\".*?\"/", "", $content); $printbody = new ilTemplate("tpl.il_as_tst_print_body.html", TRUE, TRUE, "Modules/Test"); $printbody->setVariable("TITLE", ilUtil::prepareFormOutput($this->getTitle())); $printbody->setVariable("ADM_CONTENT", $content); $printbody->setCurrentBlock("css_file"); $printbody->setVariable("CSS_FILE", $this->getTestStyleLocation("filesystem")); $printbody->parseCurrentBlock(); $printbody->setCurrentBlock("css_file"); $printbody->setVariable("CSS_FILE", ilUtil::getStyleSheetLocation("filesystem", "delos.css")); $printbody->parseCurrentBlock(); $printoutput = $printbody->get(); $html = str_replace("href=\"./", "href=\"" . ILIAS_HTTP_PATH . "/", $printoutput); $html = preg_replace("/<div id=\"dontprint\">.*?<\\/div>/ims", "", $html); if (extension_loaded("tidy")) { $config = array("indent" => false, "output-xml" => true, "numeric-entities" => true); $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); $html = tidy_get_output($tidy); $html = preg_replace("/^.*?(<html)/", "\\1", $html); } else { $html = str_replace(" ", " ", $html); $html = str_replace("⊗", "X", $html); } $html = preg_replace("/src=\".\\//ims", "src=\"" . ILIAS_HTTP_PATH . "/", $html); $this->deliverPDFfromFO($this->processPrintoutput2FO($html), $title); }
$node->appendChild($dom->createTextNode('.1')); //hack to trigger an update for 4.1.10 release due translations not being loaded on updates } } $results = $xpath->query('//*[@locale]'); for ($i = 0; $i < $results->length; $i++) { $results->item($i)->setAttribute('locale', $locale); } $out = $dom->saveXML(); if (function_exists('tidy_get_output')) { $tidy = new tidy(); $tidy_config = array('input-xml' => true, 'output-xml' => true, 'indent' => true, 'wrap' => 0); $tidy->isXML(); $tidy->parseString($out, $tidy_config, 'UTF8'); $tidy->cleanRepair(); $out = tidy_get_output($tidy); } if ($booleans['create_archive']) { $cwd = realpath(getcwd()); $c_path = realpath($configs[$module]); if (strpos($c_path, $cwd) !== 0) { I2CE::raiseError("Cannot determine module sub-directory structure for {$module}", E_USER_ERROR); } $target_dir = $archive_dir . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR . substr($c_path, strlen($cwd)) . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR; } else { $target_dir = $configs[$module] . DIRECTORY_SEPARATOR . $locale . DIRECTORY_SEPARATOR; } if (!is_dir($target_dir)) { if (!mkdir($target_dir, 0775, true)) { I2CE::raiseError("Could not created {$target_dir}", E_USER_ERROR); }
function tidyToXml($htmlTagSoup) { // Create the Tidy object $tidy = new Tidy(); // Parse the HTML into memory, turning on the option to convert to // XHTML as part of the tidying process $tidy->parseString($htmlTagSoup, array('output-xhtml' => true)); // Do the tidying $tidy->cleanRepair(); // And get the tidied version as a string $tidied_xml = tidy_get_output($tidy); // Opinions seem to differ as to whether the non-breaking space // entity ' ' is predeclared as part of XHTML. Tidy thinks it // is, and so leaves it alone, while the XML parser we're about to // use on this string thinks otherwise. So replace any occurrences // of it with its numeric equivalent (which doesn't need to be // declared). return str_replace(' ', ' ', $tidied_xml); }
fwrite($pipes[0], $source); fclose($pipes[0]); // Read clean source out to the browser while (!feof($pipes[1])) { //echo fgets($pipes[1], 1024); $newsrc .= fgets($pipes[1], 1024); } fclose($pipes[1]); // Clean up after ourselves proc_close($process); } else { /* Use tidy if it's available from PECL */ if (function_exists('tidy_parse_string')) { $tempsrc = tidy_parse_string($source); tidy_clean_repair(); $newsrc = tidy_get_output(); } else { // Better give them back what they came with, so they don't lose it all... $newsrc = "<body>\n" . $source . "\n</body>"; } } // Split our source into an array by lines $srcLines = preg_split("/\n/", $newsrc, -1, PREG_SPLIT_NO_EMPTY); // Get only the lines between the body tags $startLn = 0; while (strpos($srcLines[$startLn++], '<body') === false && $startLn < sizeof($srcLines)) { } $endLn = $startLn; while (strpos($srcLines[$endLn++], '</body') === false && $endLn < sizeof($srcLines)) { } $srcLines = array_slice($srcLines, $startLn, $endLn - $startLn - 1);
function tidy_html($input_string) { // Detect if Tidy is in configured if (function_exists('tidy_get_release')) { # Tidy for PHP version 4 if (substr(phpversion(), 0, 1) == 4) { tidy_setopt('uppercase-attributes', TRUE); tidy_setopt('wrap', 800); tidy_parse_string($input_string); $cleaned_html = tidy_get_output(); } # Tidy for PHP version 5 if (substr(phpversion(), 0, 1) == 5) { $config = array('uppercase-attributes' => true, 'wrap' => 800); $tidy = new tidy(); $tidy->parseString($input_string, $config, 'utf8'); $tidy->cleanRepair(); $cleaned_html = tidy_get_output($tidy); } } else { # Tidy not configured for this computer $cleaned_html = $input_string; } return $cleaned_html; }
/** * Convert a print output to XSL-FO * * @param string $print_output The print output * @return string XSL-FO code * @access public */ function processPrintoutput2FO($print_output) { global $ilLog; if (extension_loaded("tidy")) { $config = array("indent" => false, "output-xml" => true, "numeric-entities" => true); $tidy = new tidy(); $tidy->parseString($print_output, $config, 'utf8'); $tidy->cleanRepair(); $print_output = tidy_get_output($tidy); $print_output = preg_replace("/^.*?(<html)/", "\\1", $print_output); } else { $print_output = str_replace(" ", " ", $print_output); $print_output = str_replace("⊗", "X", $print_output); } $xsl = file_get_contents("./Modules/Survey/xml/question2fo.xsl"); // additional font support $xsl = str_replace('font-family="Helvetica, unifont"', 'font-family="' . $GLOBALS['ilSetting']->get('rpc_pdf_font', 'Helvetica, unifont') . '"', $xsl); $args = array('/_xml' => $print_output, '/_xsl' => $xsl); $xh = xslt_create(); $params = array(); $output = xslt_process($xh, "arg:/_xml", "arg:/_xsl", NULL, $args, $params); xslt_error($xh); xslt_free($xh); $ilLog->write($output); return $output; }
/** * Use the HTML tidy PECL extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * 'pear install tidy' should be able to compile the extension module. * * @private * @static */ function internalTidy($text) { global $wgTidyConf, $IP; $fname = 'Parser::internalTidy'; wfProfileIn($fname); $tidy = new tidy(); $tidy->parseString($text, $wgTidyConf, 'utf8'); $tidy->cleanRepair(); if ($tidy->getStatus() == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); } wfProfileOut($fname); return $cleansource; }
/** * Use the HTML tidy PECL extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * 'pear install tidy' should be able to compile the extension module. * * @private * @static */ function internalTidy($text) { global $wgTidyConf, $IP, $wgDebugTidy; $fname = 'Parser::internalTidy'; wfProfileIn($fname); $tidy = new tidy(); $tidy->parseString($text, $wgTidyConf, 'utf8'); $tidy->cleanRepair(); if ($tidy->getStatus() == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); } if ($wgDebugTidy && $tidy->getStatus() > 0) { $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '-->', $tidy->errorBuffer) . "\n-->"; } wfProfileOut($fname); return $cleansource; }
/** * Use HTML Tidy to validate the $text * Only runs when $config['HTML_Tidy'] is off * * @param string $text The html content to be checked. Passed by reference */ static function tidyFix(&$text, $ignore_config = false) { global $config; if (!$ignore_config) { if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') { return true; } } if (!function_exists('tidy_parse_string')) { return false; } $options = array(); $options['wrap'] = 0; //keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below $options['doctype'] = 'omit'; //omit, auto, strict, transitional, user $options['drop-empty-paras'] = true; //drop empty paragraphs $options['output-xhtml'] = true; //need this so that <br> will be <br/> .. etc $options['show-body-only'] = true; $options['hide-comments'] = false; //$options['anchor-as-name'] = true; //default is true, but not alwasy availabel. When true, adds an id attribute to anchor; when false, removes the name attribute... poorly designed, but we need it to be true // // php4 // if (function_exists('tidy_setopt')) { $options['char-encoding'] = 'utf8'; gp_edit::tidyOptions($options); $tidy = tidy_parse_string($text); tidy_clean_repair(); if (tidy_get_status() === 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy))); return false; } $text = tidy_get_output(); // // php5 // } else { $tidy = tidy_parse_string($text, $options, 'utf8'); tidy_clean_repair($tidy); if (tidy_get_status($tidy) === 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy))); return false; } $text = tidy_get_output($tidy); } return true; }
<?php /* * cleanhtml.php * * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents * if no file is provided, it reads from standard input. * * NOTE: Works only with tidy for PHP 4.3.x, for tidy in PHP 5 see cleanhtml5.php * * By: John Coggeshall <*****@*****.**> * * Usage: php cleanhtml.php [filename] * */ if (!isset($_SERVER['argv'][1])) { $data = file_get_contents("php://stdin"); tidy_parse_string($data); } else { tidy_parse_file($_SERVER['argv'][1]); } tidy_clean_repair(); if (tidy_warning_count() || tidy_error_count()) { echo "\n\nThe following errors or warnings occurred:\n"; echo tidy_get_error_buffer(); echo "\n"; } echo tidy_get_output();
/** * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * @param string $text HTML to check * @param bool $stderr Whether to read result from error status instead of output * @param int &$retval Exit code (-1 on internal error) * @return string|null */ private static function phpClean($text, $stderr = false, &$retval = null) { global $wgTidyConf, $wgDebugTidy; if (!wfIsHHVM() && !class_exists('tidy') || wfIsHHVM() && !function_exists('tidy_repair_string')) { wfWarn("Unable to load internal tidy class."); $retval = -1; return null; } $tidy = new tidy(); $tidy->parseString($text, $wgTidyConf, 'utf8'); if ($stderr) { $retval = $tidy->getStatus(); return $tidy->errorBuffer; } $tidy->cleanRepair(); $retval = $tidy->getStatus(); if ($retval == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); if ($wgDebugTidy && $retval > 0) { $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '-->', $tidy->errorBuffer) . "\n-->"; } } return $cleansource; }
} if (check_for_add_field($line)) { continue; } if (check_for_set_map($line)) { continue; } } } $text = $config->saveXML($topNode); $tidy = new tidy(); $config = array('input-xml' => true, 'output-xml' => true, 'indent' => true, 'wrap' => 0); $tidy->isXML(); $tidy->parseString($text, $config, 'UTF8'); $tidy->cleanRepair(); file_put_contents("config_formClass.xml", tidy_get_output($tidy) . "\n"); function check_for_add_field($line) { global $config; global $red; global $black; global $classNode; global $fieldsNode; if (!preg_match('/^\\s*\\$this->addField\\(\\s*(.*)\\s*\\);\\s*$/', $line, $matches)) { if (preg_match('/addField/', $line)) { echo "{$red}Found addField() but don't know how to deal with it at:{$black}\n\t{$line}\n"; } return false; } $fields = explode(',', $matches[1]); //we hope that there are no commas in the names
private function repairHtml($node) { if (class_exists('tidy')) { $c = array('clean' => true, 'output-xhtml' => true, 'show-body-only' => true, 'input-xml' => true); $tidy = new tidy(); $tidy->parseString($node, $c, 'utf8'); $tidy->cleanRepair(); $node = tidy_get_output($tidy); $tidy->diagnose(); // SPConfig::debOut( $tidy->errorBuffer ); return $node; } return "Error - cannot repair"; }
public function htmlIndentation($html) { if (class_exists('tidy')) { $config = array('char-encoding' => 'utf8', 'vertical-space' => false, 'indent' => true, 'wrap' => 0, 'word-2000' => 1, 'break-before-br' => true, 'indent-cdata' => true); $tidy = new \Tidy(); $tidy->parseString($html, $config); return str_replace('>' . PHP_EOL . '</', '></', tidy_get_output($tidy)); } else { return $html; } }
/** * Use the HTML tidy PECL extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * 'pear install tidy' should be able to compile the extension module. */ private static function execInternalTidy($text, $stderr = false, &$retval = null) { global $wgTidyConf, $IP, $wgDebugTidy; wfProfileIn(__METHOD__); $tidy = new tidy(); $tidy->parseString($text, $wgTidyConf, 'utf8'); if ($stderr) { $retval = $tidy->getStatus(); return $tidy->errorBuffer; } else { $tidy->cleanRepair(); $retval = $tidy->getStatus(); if ($retval == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); } if ($wgDebugTidy && $retval > 0) { $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '-->', $tidy->errorBuffer) . "\n-->"; } wfProfileOut(__METHOD__); return $cleansource; } }
$datetext = str_replace('juliol', 'July', $datetext); $datetext = str_replace('agost', 'August', $datetext); $datetext = str_replace('setembre', 'September', $datetext); $datetext = str_replace('octubre', 'October', $datetext); $datetext = str_replace('novembre', 'November', $datetext); $datetext = str_replace('desembre', 'December', $datetext); $date = date_create_from_format('F d, Y H:i:s', $datetext . ' 00:00:00'); $newItem->setDate($date->format('Y-m-d H:i:s')); //Now add the feed item $TestFeed->addItem($newItem); $feed_count++; } $texts = $html->find('text'); $go_on = FALSE; foreach ($texts as $text) { if ($text->plaintext == '« Older Entries') { //Not sleeping, Wordpress.com does not appear to be rate-limited $html_text = file_get_contents($text->parent->href) or exit(1); $tidy = tidy_parse_string($html_text, $tidy_config, 'UTF8'); tidy_clean_repair($tidy); $html = str_get_html(tidy_get_output($tidy)); $go_on = TRUE; break; } } } if ($feed_count == 0) { //No error but no feeds, this is wrong exit(1); } $TestFeed->generateFeed();
<?php $tidy = tidy_parse_file("intro2_ex1.html"); tidy_clean_repair($tidy); echo tidy_get_output($tidy);