/** * Gathers the advertisements from the remote page * @param post Array The post data submitted by the form. * @return Array The ads retrieved from the remote page. */ public function fetchAds($post) { $this->_client->setUri($post['url']); $response = $this->_client->request('GET')->getBody(); /** * If the tidy class exists, attempt to cleanup the XML returned from the * response requested from the remote site. */ if (class_exists('tidy')) { $tidy = new Tidy('/dev/null', array('indent' => true, 'tab-size' => 4, 'output-encoding' => 'utf8', 'newline' => 'LF', 'output-xhtml' => true), 'utf8'); $tidy->parseString($response); $tidy->cleanRepair(); $response = $tidy->value; } /** * Once we've attempted to clean up the retrieved HTML, attempt to parse the * result in a DomDocument. */ $xml = new DOMDocument('1.0', 'utf-8'); $xml->loadHTML($response); $result = array(); # Foreach of the anchor links in the page, foreach ($xml->getElementsByTagName('a') as $a) { # Get it's target HREF $href = $a->getAttribute('href'); if (preg_match("/^http:\\/\\/([a-z\\-]+\\.)?{$post['ad']}.*\$/i", $href)) { # If a link's target points to the search query (the advertising site) $result[] = $href; # Append the result. } } return $result; }
/** * Transforms an XML file into compatible XHTML based on the stylesheet * @param $xml XML DOM tree, or string filename * @return string HTML output * @todo Rename to transformToXHTML, as transformToHTML is misleading */ public function transformToHTML($xml) { if (is_string($xml)) { $dom = new DOMDocument(); $dom->load($xml); } else { $dom = $xml; } $out = $this->xsltProcessor->transformToXML($dom); // fudges for HTML backwards compatibility // assumes that document is XHTML $out = str_replace('/>', ' />', $out); // <br /> not <br/> $out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns if (class_exists('Tidy')) { // cleanup output $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 80); $tidy = new Tidy(); $tidy->parseString($out, $config, 'utf8'); $tidy->cleanRepair(); $out = (string) $tidy; } return $out; }
/** * Generates HTML from an array of tokens. * @param $tokens Array of HTMLPurifier_Token * @param $config HTMLPurifier_Config object * @return Generated HTML */ public function generateFromTokens($tokens) { if (!$tokens) { return ''; } // Basic algorithm $html = ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) { // script special case // the contents of the script block must be ONE token // for this to work. $html .= $this->generateFromToken($tokens[$i++]); $html .= $this->generateScriptFromToken($tokens[$i++]); } $html .= $this->generateFromToken($tokens[$i]); } // Tidy cleanup if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy(); $tidy->parseString($html, array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68), 'utf8'); $tidy->cleanRepair(); $html = (string) $tidy; // explicit cast necessary } // Normalize newlines to system defined value $nl = $this->config->get('Output.Newline'); if ($nl === null) { $nl = PHP_EOL; } if ($nl !== "\n") { $html = str_replace("\n", $nl, $html); } return $html; }
public function __construct($content) { if (extension_loaded('tidy')) { // using the tiny php extension $tidy = new Tidy(); $tidy->parseString($content, array('output-xhtml' => true, 'numeric-entities' => true, 'wrap' => 99999), 'utf8'); $tidy->cleanRepair(); $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } elseif (@shell_exec('which tidy')) { // using tiny through cli $CLI_content = escapeshellarg($content); $tidy = `echo {$CLI_content} | tidy -n -q -utf8 -asxhtml 2> /dev/null`; $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } else { // no tidy library found, hence no sanitizing $tidy = $content; } $this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING); if (!$this->simpleXML) { throw new Exception('CSSContentParser::__construct(): Could not parse content.' . ' Please check the PHP extension tidy is installed.'); } parent::__construct(); }
function __construct($content) { if(extension_loaded('tidy')) { // using the tiny php extension $tidy = new Tidy(); $tidy->parseString( $content, array( 'output-xhtml' => true, 'numeric-entities' => true, ), 'utf8' ); $tidy->cleanRepair(); $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy); $tidy = str_replace(' ','',$tidy); } elseif(`which tidy`) { // using tiny through cli $CLI_content = escapeshellarg($content); $tidy = `echo $CLI_content | tidy -n -q -utf8 -asxhtml 2> /dev/null`; $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy); $tidy = str_replace(' ','',$tidy); } else { // no tidy library found, hence no sanitizing $tidy = $content; } $this->simpleXML = new SimpleXMLElement($tidy); }
/** * @see ExtensionInterface */ public function apply(Response $response) { $tidy = new \Tidy(); $tidy->parseString($response->getContent()); if ($tidy->errorBuffer) { throw new \Exception($tidy->errorBuffer); } }
/** * receive the html content, fix/format the dom tree and return it * * @param string $content * @return string */ protected function _tidyFix($content) { $config = ['input-xml' => true, 'output-xml' => true, 'wrap' => false]; $tidy = new Tidy(); $tidy->parseString($content, $config, 'utf8'); $tidy->cleanRepair(); $content = (string) $tidy; return $content; }
public static function beforeResponse($request, $response) { if ($request['_format'] == 'html') { $tidy = new \Tidy(); $tidy->parseString($response, array('wrap' => 200, 'indent' => true), 'utf8'); $tidy->cleanRepair(); $html = $tidy->html(); $response = $html->value; } return $response; }
public function formatHtml($html, $charset = null, $charset_hint = null) { $html = $this->toUTF8($html, $charset, $charset_hint); $tidy = new Tidy(); $config = array("hide-comments" => true); $tidy->parseString($html, $config, 'UTF8'); $tidy->cleanRepair(); $html = (string) $tidy; $html = $this->moveMetaContentTypeToTop($html); $html = $this->formatDocType($html); return $html; }
function parse_html($html_code) { $this->html_code = $html_code; // Tidy HTML code $tidy = new Tidy(); $tidy->parseString($html_code, $this->tidy_config, 'utf8'); $tidy->cleanRepair(); $this->tidy_code = $tidy->value; $this->dom = DOMDocument::loadXML($tidy->value); $this->dom->normalizeDocument(); if ($this->dom == null) { trigger_error("Unable to parse XML Document!", E_USER_ERROR); } }
public function formatTables($text) { $text = preg_replace_callback('%<div class="rvps(?:14|8)">\\n*<table.*?>([\\s\\S]*?)</table>\\n*</div>%u', function ($matches) { $table = '<table>' . $matches[1] . '</table>'; $table = preg_replace('%(?:<p class="rvps(?:1|4|14)">)?<span class="rvts(?:9|15|23)">\\s*(.*?)\\s*</span>(?:</p>)?%u', '<b class="table-header">$1</b>', $table); $table = preg_replace('%<b class="table-header"><br></b>%u', '', $table); // rvps14 - rvps14 // rvps14 - rvps11 // rvps4 - rvps15 $config = array('clean' => true, 'output-html' => true, 'show-body-only' => true, 'wrap' => 0, 'indent' => true); $tidy = new \Tidy(); $tidy->parseString($table, $config, 'utf8'); $tidy->cleanRepair(); return $tidy . "\n"; }, $text); return $text; }
/** * Reads input and returns Tidy-filtered output. * * @param null $len * * @throws BuildException * @return the resulting stream, or -1 if the end of the resulting stream has been reached * */ public function read($len = null) { if (!class_exists('Tidy')) { throw new BuildException("You must enable the 'tidy' extension in your PHP configuration in order to use the Tidy filter."); } if (!$this->getInitialized()) { $this->_initialize(); $this->setInitialized(true); } $buffer = $this->in->read($len); if ($buffer === -1) { return -1; } $config = $this->getDistilledConfig(); $tidy = new Tidy(); $tidy->parseString($buffer, $config, $this->encoding); $tidy->cleanRepair(); return tidy_get_output($tidy); }
/** * Transforms an XML file into HTML based on the stylesheet * @param $xml XML DOM tree */ public function transformToHTML($xml) { $out = $this->xsltProcessor->transformToXML($xml); // fudges for HTML backwards compatibility $out = str_replace('/>', ' />', $out); // <br /> not <br/> $out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns $out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns if (class_exists('Tidy')) { // cleanup output $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 80); $tidy = new Tidy(); $tidy->parseString($out, $config, 'utf8'); $tidy->cleanRepair(); $out = (string) $tidy; } return $out; }
public function generateResponse() { TemplateEngine::compile(); if (Gravel::$config['gravel']['tidy_html'] && class_exists('Tidy')) { $html = new \Tidy(); $config = ['indent' => 1, 'indent-spaces' => 4, 'output-xhtml' => 'false', 'wrap' => 0, 'hide-comments' => 0]; $html->parseString(TemplateEngine::$data['compiled'], $config); } else { $html = TemplateEngine::$data['compiled']; } if (Gravel::$config['gravel']['debug_mode']) { header("Content-Type: text/plain"); } echo $html; // if we don't have an ajax request we can output some debug info if (!isset($_SERVER['HTTP_X_REQUESTED_WITH']) || $_SERVER['HTTP_X_REQUESTED_WITH'] !== 'XMLHttpRequest') { $version = Gravel::$version; echo PHP_EOL . "<!-- Generated in " . number_format(microtime(true) - Gravel::$startTime, 5) . " seconds -->"; echo PHP_EOL . "<!-- Gravel PHP framework {$version} -->"; } }
protected function loadHtml($uri) { if (preg_match('/^https?:/i', $uri) === 0) { $uri = $this->config->getBaseHref() . $uri; } $curl = curl_init($uri); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); $html = curl_exec($curl); $this->request_info = curl_getinfo($curl); curl_close($curl); $this->location = $uri; $tidy = new Tidy(); $tidy->parseString($html, array('output-xhtml' => true, 'char-encoding' => 'utf8', 'numeric-entities' => true), 'utf8'); $tidy->cleanRepair(); $this->document = new DOMDocument(); $this->document->resolveExternals = true; $this->document->loadXml($tidy); $this->xpath = new DOMXPath($this->document); $this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); $this->xpath->registerNamespace('html', 'http://www.w3.org/1999/xhtml'); }
private static function tidyit($in) { $tidy = new \Tidy(); $tidy->parseString($in, ['indent' => true, 'input-xml' => true, 'wrap' => 200], 'utf8'); return $tidy; }
$response['code'] = "N/A"; $response['message'] = ""; //use php's filter to check for a valid url if (!filter_var($_POST['url'], FILTER_VALIDATE_URL) === false) { $url = $_POST['url']; $curl = new MyCurl($url); $curl->createCurl(); $response['code'] = $curl->getHttpStatus(); $response['message'] = HttpCodes::getType($response['code']); $html = $curl->__toString(); if (!is_string($html)) { $response['message'] = "Page Could not be loaded, check the domain. Nothing was returned."; } else { $tidy = new Tidy(); //load page into tidy object, set options, and clean html $tidy->parseString($html, array('indent' => 2, 'output-xhtml' => true)); $tidy->cleanRepair(); //html is now nicely indented $html = (string) $tidy; //count the tags and get the result in a $tag => $count array $tagCount = countTags($html); $response['tagCount'] = $tagCount; $response['html'] = htmlentities($html); } } else { $response['message'] = $_POST['url'] . " is not a valid URL"; } header('Content-Type: application/json'); echo json_encode($response); } else { //load the base view, located at ../views/base.php
/** * Generates HTML from an array of tokens. * @param $tokens Array of HTMLPurifier_Token * @param $config HTMLPurifier_Config object * @return Generated HTML */ function generateFromTokens($tokens, $config, &$context) { $html = ''; if (!$config) { $config = HTMLPurifier_Config::createDefault(); } $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); $this->_def = $config->getHTMLDefinition(); $this->_xhtml = $this->_def->doctype->xml; if (!$tokens) { return ''; } for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2]->type == 'end') { // script special case // the contents of the script block must be ONE token // for this to work $html .= $this->generateFromToken($tokens[$i++]); $html .= $this->generateScriptFromToken($tokens[$i++]); // We're not going to do this: it wouldn't be valid anyway //while ($tokens[$i]->name != 'script') { // $html .= $this->generateScriptFromToken($tokens[$i++]); //} } $html .= $this->generateFromToken($tokens[$i]); } if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) { $tidy_options = array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68); if (version_compare(PHP_VERSION, '5', '<')) { tidy_set_encoding('utf8'); foreach ($tidy_options as $key => $value) { tidy_setopt($key, $value); } tidy_parse_string($html); tidy_clean_repair(); $html = tidy_get_output(); } else { $tidy = new Tidy(); $tidy->parseString($html, $tidy_options, 'utf8'); $tidy->cleanRepair(); $html = (string) $tidy; } } // normalize newlines to system $nl = $config->get('Output', 'Newline'); if ($nl === null) { $nl = PHP_EOL; } $html = str_replace("\n", $nl, $html); return $html; }
function tidyToXml($htmlTagSoup) { // Create the Tidy object $tidy = new Tidy(); // Parse the HTML into memory, turning on the option to convert to // XHTML as part of the tidying process $tidy->parseString($htmlTagSoup, array('output-xhtml' => true)); // Do the tidying $tidy->cleanRepair(); // And get the tidied version as a string $tidied_xml = tidy_get_output($tidy); // Opinions seem to differ as to whether the non-breaking space // entity ' ' is predeclared as part of XHTML. Tidy thinks it // is, and so leaves it alone, while the XML parser we're about to // use on this string thinks otherwise. So replace any occurrences // of it with its numeric equivalent (which doesn't need to be // declared). return str_replace(' ', ' ', $tidied_xml); }
/** * @dataProvider filterProvider * @covers Robo47_Filter_Tidy::filter */ public function testFilter($code) { $filter = new Robo47_Filter_Tidy(); $filtered = $filter->filter($code); $tidy = new Tidy(); $tidy->parseString($code, $filter->getConfig(), $filter->getEncoding()); $tidy->cleanRepair(); $this->assertEquals((string) $tidy, $filtered, 'Filter output missmatches direct tidy-output'); }
private function clean($content) { if (!$content) { return ''; } $tidy = new \Tidy(); $tidy->parseString($content, ['indent' => true, 'doctype' => 'omit', 'output-html' => true, 'show-body-only' => true, 'drop-empty-paras' => true, 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'hide-comments' => true, 'logical-emphasis' => true]); $tidy->cleanRepair(); return (string) $content; }
<?php $start = microtime(true); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, 'http://thinkphp.com.ua/'); // we want to pretend the Googlebot curl_setopt($ch, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $content = curl_exec($ch); // fix the document, as it's xml $tidy = new Tidy(); $tidy->parseString($content, ['input-xml' => true, 'output-xml' => true, 'wrap' => false], 'utf8'); $tidy->cleanRepair(); $content = (string) $tidy; // load the string as simplexml object $xml = simplexml_load_string($content); // registering the namespace, so we can search $xml->registerXPathNamespace('xmlns', 'http://www.w3.org/1999/xhtml'); $eventsHeaders = []; foreach ($xml->xpath('//xmlns:h2') as $node) { // remove if present unset($node->span); // if the href is there, let's parse it if (isset($node->a['href'])) { $link = (string) $node->a['href']; } else { $link = null; } $eventsHeaders[] = ['title' => trim(strip_tags($node->asXml())), 'link' => $link]; } $finish = microtime(true);
function ob_sessrewrite($buffer) { global $scripturl, $modSettings, $context, $user_info, $txt, $time_start, $db_count; /* * tidy support as a debugging option to generate prettified output * and only do it for the admin when 'tidyup' is set in the request string (tidy can be slow) * pretty HTML output might help with debugging templates */ if (isset($_REQUEST['tidyup']) && !isset($_REQUEST['xml']) && class_exists('Tidy') && $user_info['is_admin']) { $tidy = new Tidy(); $tidy_config = array('indent' => true, 'output-html' => true, 'wrap' => 0, 'merge-divs' => false, 'merge-spans' => false); $tidy->parseString($buffer, $tidy_config, 'utf8'); $buffer = $tidy; } // If $scripturl is set to nothing, or the SID is not defined (SSI?) just quit. if ($scripturl == '' || !defined('SID')) { return $buffer; } // rewrite urls with PHPSESSID, but only if the session isn't cookied and NOT for spiders if (empty($_COOKIE) && SID != '' && empty($context['browser']['possibly_robot'])) { $buffer = preg_replace('/"' . preg_quote($scripturl, '/') . '(?!\\?' . preg_quote(SID, '/') . ')\\??/', '"' . $scripturl . '?' . SID . '&', $buffer); } elseif (isset($_GET['debug'])) { $buffer = preg_replace('/(?<!<link rel="canonical" href=)"' . preg_quote($scripturl, '/') . '\\??/', '"' . $scripturl . '?debug;', $buffer); } $now = microtime(); $context['load_time'] = round(array_sum(explode(' ', $now)) - array_sum(explode(' ', $time_start)), 3); $context['load_queries'] = $db_count; $context['template_benchmark_time'] = round(array_sum(explode(' ', $now)) - array_sum(explode(' ', $context['template_benchmark'])), 3); if (!empty($modSettings['simplesef_enable'])) { $buffer = isset($context['sef_full_rewrite']) ? SimpleSEF::ob_simplesef($buffer) : SimpleSEF::ob_simplesef_light($buffer); //$buffer .= SimpleSEF::$debug_info; } $_t = EoS_Smarty::isActive() ? 's template-smarty), ' : 's template), '; $buffer = str_replace('@%%__loadtime__%%@', $user_info['is_admin'] ? $context['load_time'] . 's CPU (' . $context['template_benchmark_time'] . $_t . $context['load_queries'] . ' ' . $txt['queries'] . SimpleSEF::getPerfData() : '', $buffer); if (isset($_REQUEST['xml'])) { $buffer = ltrim($buffer); } return $buffer; }
function __toString() { $this->set('jQuery', '<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>'); $this->set('bootstrap', '<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet"> <script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>'); $this->time = round(microtime() - $this->time, 4); $this->set("time", $this->time); $text = $this->parseVal($this->str); if (Config\Main::$tidyEnabled) { $tidy = new \Tidy(); $tidy->parseString($text, ["wrap" => 160]); $tidy->cleanRepair(); $text = $tidy; } return (string) $text; }
/** Clean and repair HTML @return string @param $html string @public **/ static function tidy($html) { if (!extension_loaded('tidy')) { return $html; } $tidy = new Tidy(); $tidy->parseString($html, self::$vars['TIDY'], str_replace('-', '', self::$vars['ENCODING'])); $tidy->cleanRepair(); return (string) $tidy; }
error_reporting(E_ALL); //////////////// VARIABLES /////////////////// // some, but not all options for tidy // found on tidy.sourceforge.net/docs/quickref.html // and tidy.sourceforge.net/docs/tidy_man.html // '::' marks all as optional $short_options = 'o::' . 'f::' . 'm::' . 'i::' . 'w::' . 'u::' . 'c::' . 'b::' . 'n::' . 'e::' . 'q::' . 'v::' . 'h::'; $long_options = array('add-xml-decl::', 'add-xml-space::', 'alt-text::', 'anchor-as-name::', 'assume-xml-procins::', 'bare::', 'clean::', 'css-prefix::', 'decorate-inferred-ul::', 'doctype DocType auto::', 'drop-empty-paras::', 'drop-font-tags::', 'drop-proprietary-attributes::', 'enclose-block-text::', 'enclose-text::', 'escape-cdata::', 'fix-backslash::', 'fix-bad-comments::', 'fix-uri::', 'hide-comments::', 'hide-endtags::', 'indent-cdata::', 'input-xml::', 'join-classes::', 'join-styles::', 'literal-attributes::', 'logical-emphasis::', 'lower-literals::', 'merge-divs::', 'merge-spans::', 'ncr::', 'new-blocklevel-tags::', 'new-empty-tags::', 'new-inline-tags::', 'new-pre-tags::', 'numeric-entities::', 'output-html::', 'output-xhtml::', 'output-xml::', 'preserve-entities::', 'quote-ampersand::', 'quote-marks::', 'quote-nbsp::', 'repeated-attributes::', 'replace-color::', 'show-body-only::', 'uppercase-attributes::', 'uppercase-tags::', 'word-2000::', 'break-before-br::', 'indent::', 'indent-attributes::', 'indent-spaces::', 'markup::', 'punctuation-wrap::', 'sort-attributes::', 'split::', 'tab-size::', 'vertical-space::', 'wrap::', 'wrap-asp::', 'wrap-attributes::', 'wrap-jste::', 'wrap-php::', 'wrap-script-literals::', 'wrap-sections::', 'ascii-chars::', 'char-encoding::', 'input-encoding::', 'language::', 'newline::', 'output-bom::', 'output-encoding::', 'error-file::', 'force-output::', 'gnu-emacs::', 'gnu-emacs-file::', 'keep-time::', 'output-file::', 'tidy-mark::', 'write-back::'); ///////////// PROCEDURES //////////////// if (!version_compare(phpversion(), "5.2", ">=")) { fwrite(STDERR, "Error: tidy.php requires PHP 5.2 or newer.\n"); exit(1); } if (!class_exists('Tidyx') && !class_exists('Tidy')) { fwrite(STDERR, "Error: tidy.php requires PHP 5.2 with libtidy support built in.\n"); exit(1); } // Parse arguments using the lists above. $arguments = getopt($short_options, $long_options); $input = stream_get_contents(STDIN); try { $tidy = new Tidy(); $tidy->parseString($input, $arguments, 'utf8'); } catch (Exception $e) { fwrite(STDERR, "Error: PHP doesn't have libtidy installed.\n"); exit(1); } fwrite(STDOUT, (string) $tidy); if ($tidy->errorBuffer) { fwrite(STDERR, $tidy->errorBuffer); }
public function generateFromTokens($tokens) { if (!$tokens) { return ''; } $html = ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) { $html .= $this->generateFromToken($tokens[$i++]); $html .= $this->generateScriptFromToken($tokens[$i++]); } $html .= $this->generateFromToken($tokens[$i]); } if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy(); $tidy->parseString($html, array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68), 'utf8'); $tidy->cleanRepair(); $html = (string) $tidy; } if ($this->config->get('Core.NormalizeNewlines')) { $nl = $this->config->get('Output.Newline'); if ($nl === null) { $nl = PHP_EOL; } if ($nl !== "\n") { $html = str_replace("\n", $nl, $html); } } return $html; }
/** * Custom function that formats a string of HTML using Tidy * @param string $string */ function unl_tidy($string) { if (class_exists('Tidy') && variable_get('unl_tidy')) { $tidy = new Tidy(); // Tidy options: http://tidy.sourceforge.net/docs/quickref.html $options = array('doctype' => 'omit', 'new-blocklevel-tags' => 'article,aside,header,footer,section,nav,hgroup,address,figure,figcaption,output', 'new-inline-tags' => 'video,audio,canvas,ruby,rt,rp,time,code,kbd,samp,var,mark,bdi,bdo,wbr,details,datalist,source,summary', 'output-xhtml' => true, 'show-body-only' => true, 'indent' => true, 'indent-spaces' => 2, 'vertical-space' => false, 'wrap' => 140, 'wrap-attributes' => false, 'force-output' => true, 'quiet' => true, 'tidy-mark' => false); // Add to prevent Tidy from removing script or comment if it is the first thing if (strtolower(substr(trim($string), 0, 7)) == '<script' || substr(trim($string), 0, 4) == '<!--') { $statement = ''; if (substr(trim($string), 0, 9) !== '<!-- Tidy') { $statement = "<!-- Tidy: Start field with something other than script or comment to remove this -->\n"; } $string = " " . $statement . $string; } $tidy->parseString($string, $options, 'utf8'); if ($tidy->cleanRepair()) { return $tidy; } } return $string; }
protected function cleanup(&$source) { $tidy = new Tidy(); $tidy->parseString($source, array('drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'hide-comments' => true, 'numeric-entities' => true, 'output-xhtml' => true, 'wrap' => 0, 'bare' => true, 'word-2000' => true, 'new-blocklevel-tags' => 'section nav article aside hgroup header footer figure figcaption ruby video audio canvas details datagrid summary menu', 'new-inline-tags' => 'time mark rt rp output progress meter', 'new-empty-tags' => 'wbr source keygen command'), 'utf8'); $source = $tidy->body()->value; }
public function htmlIndentation($html) { if (class_exists('tidy')) { $config = array('char-encoding' => 'utf8', 'vertical-space' => false, 'indent' => true, 'wrap' => 0, 'word-2000' => 1, 'break-before-br' => true, 'indent-cdata' => true); $tidy = new \Tidy(); $tidy->parseString($html, $config); return str_replace('>' . PHP_EOL . '</', '></', tidy_get_output($tidy)); } else { return $html; } }