Exemplo n.º 1
0
 /**
  *	Gathers the advertisements from the remote page
  *	@param post		Array				The post data submitted by the form.
  *	@return			Array				The ads retrieved from the remote page.
  */
 public function fetchAds($post)
 {
     $this->_client->setUri($post['url']);
     $response = $this->_client->request('GET')->getBody();
     /**
      *	If the tidy class exists, attempt to cleanup the XML returned from the
      *	response requested from the remote site.
      */
     if (class_exists('tidy')) {
         $tidy = new Tidy('/dev/null', array('indent' => true, 'tab-size' => 4, 'output-encoding' => 'utf8', 'newline' => 'LF', 'output-xhtml' => true), 'utf8');
         $tidy->parseString($response);
         $tidy->cleanRepair();
         $response = $tidy->value;
     }
     /**
      *	Once we've attempted to clean up the retrieved HTML, attempt to parse the
      *	result in a DomDocument.
      */
     $xml = new DOMDocument('1.0', 'utf-8');
     $xml->loadHTML($response);
     $result = array();
     # Foreach of the anchor links in the page,
     foreach ($xml->getElementsByTagName('a') as $a) {
         # Get it's target HREF
         $href = $a->getAttribute('href');
         if (preg_match("/^http:\\/\\/([a-z\\-]+\\.)?{$post['ad']}.*\$/i", $href)) {
             # If a link's target points to the search query (the advertising site)
             $result[] = $href;
             # Append the result.
         }
     }
     return $result;
 }
Exemplo n.º 2
0
 /**
  * Transforms an XML file into compatible XHTML based on the stylesheet
  * @param $xml XML DOM tree, or string filename
  * @return string HTML output
  * @todo Rename to transformToXHTML, as transformToHTML is misleading
  */
 public function transformToHTML($xml)
 {
     if (is_string($xml)) {
         $dom = new DOMDocument();
         $dom->load($xml);
     } else {
         $dom = $xml;
     }
     $out = $this->xsltProcessor->transformToXML($dom);
     // fudges for HTML backwards compatibility
     // assumes that document is XHTML
     $out = str_replace('/>', ' />', $out);
     // <br /> not <br/>
     $out = str_replace(' xmlns=""', '', $out);
     // rm unnecessary xmlns
     if (class_exists('Tidy')) {
         // cleanup output
         $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 80);
         $tidy = new Tidy();
         $tidy->parseString($out, $config, 'utf8');
         $tidy->cleanRepair();
         $out = (string) $tidy;
     }
     return $out;
 }
Exemplo n.º 3
0
 /**
  * Generates HTML from an array of tokens.
  * @param $tokens Array of HTMLPurifier_Token
  * @param $config HTMLPurifier_Config object
  * @return Generated HTML
  */
 public function generateFromTokens($tokens)
 {
     if (!$tokens) {
         return '';
     }
     // Basic algorithm
     $html = '';
     for ($i = 0, $size = count($tokens); $i < $size; $i++) {
         if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
             // script special case
             // the contents of the script block must be ONE token
             // for this to work.
             $html .= $this->generateFromToken($tokens[$i++]);
             $html .= $this->generateScriptFromToken($tokens[$i++]);
         }
         $html .= $this->generateFromToken($tokens[$i]);
     }
     // Tidy cleanup
     if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
         $tidy = new Tidy();
         $tidy->parseString($html, array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68), 'utf8');
         $tidy->cleanRepair();
         $html = (string) $tidy;
         // explicit cast necessary
     }
     // Normalize newlines to system defined value
     $nl = $this->config->get('Output.Newline');
     if ($nl === null) {
         $nl = PHP_EOL;
     }
     if ($nl !== "\n") {
         $html = str_replace("\n", $nl, $html);
     }
     return $html;
 }
Exemplo n.º 4
0
 public function __construct($content)
 {
     if (extension_loaded('tidy')) {
         // using the tiny php extension
         $tidy = new Tidy();
         $tidy->parseString($content, array('output-xhtml' => true, 'numeric-entities' => true, 'wrap' => 99999), 'utf8');
         $tidy->cleanRepair();
         $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy);
         $tidy = str_replace('&#160;', '', $tidy);
     } elseif (@shell_exec('which tidy')) {
         // using tiny through cli
         $CLI_content = escapeshellarg($content);
         $tidy = `echo {$CLI_content} | tidy -n -q -utf8 -asxhtml 2> /dev/null`;
         $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy);
         $tidy = str_replace('&#160;', '', $tidy);
     } else {
         // no tidy library found, hence no sanitizing
         $tidy = $content;
     }
     $this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING);
     if (!$this->simpleXML) {
         throw new Exception('CSSContentParser::__construct(): Could not parse content.' . ' Please check the PHP extension tidy is installed.');
     }
     parent::__construct();
 }
Exemplo n.º 5
0
	function __construct($content) {
		if(extension_loaded('tidy')) {
			// using the tiny php extension
			$tidy = new Tidy();
			$tidy->parseString(
				$content, 
				array(
					'output-xhtml' => true,
					'numeric-entities' => true,
				), 
				'utf8'
			);
			$tidy->cleanRepair();
			$tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy);
			$tidy = str_replace('&#160;','',$tidy);
		} elseif(`which tidy`) {
			// using tiny through cli
			$CLI_content = escapeshellarg($content);
			$tidy = `echo $CLI_content | tidy -n -q -utf8 -asxhtml 2> /dev/null`;
			$tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy);
			$tidy = str_replace('&#160;','',$tidy);
		} else {
			// no tidy library found, hence no sanitizing
			$tidy = $content;
		}
		
		
		
		$this->simpleXML = new SimpleXMLElement($tidy);
	}
Exemplo n.º 6
0
 /**
  * @see ExtensionInterface
  */
 public function apply(Response $response)
 {
     $tidy = new \Tidy();
     $tidy->parseString($response->getContent());
     if ($tidy->errorBuffer) {
         throw new \Exception($tidy->errorBuffer);
     }
 }
Exemplo n.º 7
0
 /**
  * receive the html content, fix/format the dom tree and return it
  * 
  * @param string $content
  * @return string
  */
 protected function _tidyFix($content)
 {
     $config = ['input-xml' => true, 'output-xml' => true, 'wrap' => false];
     $tidy = new Tidy();
     $tidy->parseString($content, $config, 'utf8');
     $tidy->cleanRepair();
     $content = (string) $tidy;
     return $content;
 }
Exemplo n.º 8
0
 public static function beforeResponse($request, $response)
 {
     if ($request['_format'] == 'html') {
         $tidy = new \Tidy();
         $tidy->parseString($response, array('wrap' => 200, 'indent' => true), 'utf8');
         $tidy->cleanRepair();
         $html = $tidy->html();
         $response = $html->value;
     }
     return $response;
 }
Exemplo n.º 9
0
 public function formatHtml($html, $charset = null, $charset_hint = null)
 {
     $html = $this->toUTF8($html, $charset, $charset_hint);
     $tidy = new Tidy();
     $config = array("hide-comments" => true);
     $tidy->parseString($html, $config, 'UTF8');
     $tidy->cleanRepair();
     $html = (string) $tidy;
     $html = $this->moveMetaContentTypeToTop($html);
     $html = $this->formatDocType($html);
     return $html;
 }
Exemplo n.º 10
0
 function parse_html($html_code)
 {
     $this->html_code = $html_code;
     // Tidy HTML code
     $tidy = new Tidy();
     $tidy->parseString($html_code, $this->tidy_config, 'utf8');
     $tidy->cleanRepair();
     $this->tidy_code = $tidy->value;
     $this->dom = DOMDocument::loadXML($tidy->value);
     $this->dom->normalizeDocument();
     if ($this->dom == null) {
         trigger_error("Unable to parse XML Document!", E_USER_ERROR);
     }
 }
Exemplo n.º 11
0
 public function formatTables($text)
 {
     $text = preg_replace_callback('%<div class="rvps(?:14|8)">\\n*<table.*?>([\\s\\S]*?)</table>\\n*</div>%u', function ($matches) {
         $table = '<table>' . $matches[1] . '</table>';
         $table = preg_replace('%(?:<p class="rvps(?:1|4|14)">)?<span class="rvts(?:9|15|23)">\\s*(.*?)\\s*</span>(?:</p>)?%u', '<b class="table-header">$1</b>', $table);
         $table = preg_replace('%<b class="table-header"><br></b>%u', '', $table);
         // rvps14 - rvps14
         // rvps14 - rvps11
         // rvps4 - rvps15
         $config = array('clean' => true, 'output-html' => true, 'show-body-only' => true, 'wrap' => 0, 'indent' => true);
         $tidy = new \Tidy();
         $tidy->parseString($table, $config, 'utf8');
         $tidy->cleanRepair();
         return $tidy . "\n";
     }, $text);
     return $text;
 }
Exemplo n.º 12
0
 /**
  * Reads input and returns Tidy-filtered output.
  *
  * @param null $len
  *
  * @throws BuildException
  * @return the resulting stream, or -1 if the end of the resulting stream has been reached
  *
  */
 public function read($len = null)
 {
     if (!class_exists('Tidy')) {
         throw new BuildException("You must enable the 'tidy' extension in your PHP configuration in order to use the Tidy filter.");
     }
     if (!$this->getInitialized()) {
         $this->_initialize();
         $this->setInitialized(true);
     }
     $buffer = $this->in->read($len);
     if ($buffer === -1) {
         return -1;
     }
     $config = $this->getDistilledConfig();
     $tidy = new Tidy();
     $tidy->parseString($buffer, $config, $this->encoding);
     $tidy->cleanRepair();
     return tidy_get_output($tidy);
 }
Exemplo n.º 13
0
 /**
  * Transforms an XML file into HTML based on the stylesheet
  * @param $xml XML DOM tree
  */
 public function transformToHTML($xml)
 {
     $out = $this->xsltProcessor->transformToXML($xml);
     // fudges for HTML backwards compatibility
     $out = str_replace('/>', ' />', $out);
     // <br /> not <br/>
     $out = str_replace(' xmlns=""', '', $out);
     // rm unnecessary xmlns
     $out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out);
     // rm unnecessary xmlns
     if (class_exists('Tidy')) {
         // cleanup output
         $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 80);
         $tidy = new Tidy();
         $tidy->parseString($out, $config, 'utf8');
         $tidy->cleanRepair();
         $out = (string) $tidy;
     }
     return $out;
 }
Exemplo n.º 14
0
 public function generateResponse()
 {
     TemplateEngine::compile();
     if (Gravel::$config['gravel']['tidy_html'] && class_exists('Tidy')) {
         $html = new \Tidy();
         $config = ['indent' => 1, 'indent-spaces' => 4, 'output-xhtml' => 'false', 'wrap' => 0, 'hide-comments' => 0];
         $html->parseString(TemplateEngine::$data['compiled'], $config);
     } else {
         $html = TemplateEngine::$data['compiled'];
     }
     if (Gravel::$config['gravel']['debug_mode']) {
         header("Content-Type: text/plain");
     }
     echo $html;
     // if we don't have an ajax request we can output some debug info
     if (!isset($_SERVER['HTTP_X_REQUESTED_WITH']) || $_SERVER['HTTP_X_REQUESTED_WITH'] !== 'XMLHttpRequest') {
         $version = Gravel::$version;
         echo PHP_EOL . "<!-- Generated in " . number_format(microtime(true) - Gravel::$startTime, 5) . " seconds -->";
         echo PHP_EOL . "<!-- Gravel PHP framework {$version} -->";
     }
 }
Exemplo n.º 15
0
 protected function loadHtml($uri)
 {
     if (preg_match('/^https?:/i', $uri) === 0) {
         $uri = $this->config->getBaseHref() . $uri;
     }
     $curl = curl_init($uri);
     curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
     $html = curl_exec($curl);
     $this->request_info = curl_getinfo($curl);
     curl_close($curl);
     $this->location = $uri;
     $tidy = new Tidy();
     $tidy->parseString($html, array('output-xhtml' => true, 'char-encoding' => 'utf8', 'numeric-entities' => true), 'utf8');
     $tidy->cleanRepair();
     $this->document = new DOMDocument();
     $this->document->resolveExternals = true;
     $this->document->loadXml($tidy);
     $this->xpath = new DOMXPath($this->document);
     $this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
     $this->xpath->registerNamespace('html', 'http://www.w3.org/1999/xhtml');
 }
Exemplo n.º 16
0
 private static function tidyit($in)
 {
     $tidy = new \Tidy();
     $tidy->parseString($in, ['indent' => true, 'input-xml' => true, 'wrap' => 200], 'utf8');
     return $tidy;
 }
Exemplo n.º 17
0
    $response['code'] = "N/A";
    $response['message'] = "";
    //use php's filter to check for a valid url
    if (!filter_var($_POST['url'], FILTER_VALIDATE_URL) === false) {
        $url = $_POST['url'];
        $curl = new MyCurl($url);
        $curl->createCurl();
        $response['code'] = $curl->getHttpStatus();
        $response['message'] = HttpCodes::getType($response['code']);
        $html = $curl->__toString();
        if (!is_string($html)) {
            $response['message'] = "Page Could not be loaded, check the domain. Nothing was returned.";
        } else {
            $tidy = new Tidy();
            //load page into tidy object, set options, and clean html
            $tidy->parseString($html, array('indent' => 2, 'output-xhtml' => true));
            $tidy->cleanRepair();
            //html is now nicely indented
            $html = (string) $tidy;
            //count the tags and get the result in a $tag => $count array
            $tagCount = countTags($html);
            $response['tagCount'] = $tagCount;
            $response['html'] = htmlentities($html);
        }
    } else {
        $response['message'] = $_POST['url'] . " is not a valid URL";
    }
    header('Content-Type: application/json');
    echo json_encode($response);
} else {
    //load the base view, located at ../views/base.php
Exemplo n.º 18
0
 /**
  * Generates HTML from an array of tokens.
  * @param $tokens Array of HTMLPurifier_Token
  * @param $config HTMLPurifier_Config object
  * @return Generated HTML
  */
 function generateFromTokens($tokens, $config, &$context)
 {
     $html = '';
     if (!$config) {
         $config = HTMLPurifier_Config::createDefault();
     }
     $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
     $this->_def = $config->getHTMLDefinition();
     $this->_xhtml = $this->_def->doctype->xml;
     if (!$tokens) {
         return '';
     }
     for ($i = 0, $size = count($tokens); $i < $size; $i++) {
         if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2]->type == 'end') {
             // script special case
             // the contents of the script block must be ONE token
             // for this to work
             $html .= $this->generateFromToken($tokens[$i++]);
             $html .= $this->generateScriptFromToken($tokens[$i++]);
             // We're not going to do this: it wouldn't be valid anyway
             //while ($tokens[$i]->name != 'script') {
             //    $html .= $this->generateScriptFromToken($tokens[$i++]);
             //}
         }
         $html .= $this->generateFromToken($tokens[$i]);
     }
     if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) {
         $tidy_options = array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68);
         if (version_compare(PHP_VERSION, '5', '<')) {
             tidy_set_encoding('utf8');
             foreach ($tidy_options as $key => $value) {
                 tidy_setopt($key, $value);
             }
             tidy_parse_string($html);
             tidy_clean_repair();
             $html = tidy_get_output();
         } else {
             $tidy = new Tidy();
             $tidy->parseString($html, $tidy_options, 'utf8');
             $tidy->cleanRepair();
             $html = (string) $tidy;
         }
     }
     // normalize newlines to system
     $nl = $config->get('Output', 'Newline');
     if ($nl === null) {
         $nl = PHP_EOL;
     }
     $html = str_replace("\n", $nl, $html);
     return $html;
 }
Exemplo n.º 19
0
function tidyToXml($htmlTagSoup)
{
    // Create the Tidy object
    $tidy = new Tidy();
    // Parse the HTML into memory, turning on the option to convert to
    // XHTML as part of the tidying process
    $tidy->parseString($htmlTagSoup, array('output-xhtml' => true));
    // Do the tidying
    $tidy->cleanRepair();
    // And get the tidied version as a string
    $tidied_xml = tidy_get_output($tidy);
    // Opinions seem to differ as to whether the non-breaking space
    // entity '&nbsp;' is predeclared as part of XHTML.  Tidy thinks it
    // is, and so leaves it alone, while the XML parser we're about to
    // use on this string thinks otherwise.  So replace any occurrences
    // of it with its numeric equivalent (which doesn't need to be
    // declared).
    return str_replace('&nbsp;', '&#160;', $tidied_xml);
}
Exemplo n.º 20
0
 /**
  * @dataProvider filterProvider
  * @covers Robo47_Filter_Tidy::filter
  */
 public function testFilter($code)
 {
     $filter = new Robo47_Filter_Tidy();
     $filtered = $filter->filter($code);
     $tidy = new Tidy();
     $tidy->parseString($code, $filter->getConfig(), $filter->getEncoding());
     $tidy->cleanRepair();
     $this->assertEquals((string) $tidy, $filtered, 'Filter output missmatches direct tidy-output');
 }
Exemplo n.º 21
0
 private function clean($content)
 {
     if (!$content) {
         return '';
     }
     $tidy = new \Tidy();
     $tidy->parseString($content, ['indent' => true, 'doctype' => 'omit', 'output-html' => true, 'show-body-only' => true, 'drop-empty-paras' => true, 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'hide-comments' => true, 'logical-emphasis' => true]);
     $tidy->cleanRepair();
     return (string) $content;
 }
Exemplo n.º 22
0
<?php

$start = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://thinkphp.com.ua/');
// we want to pretend the Googlebot
curl_setopt($ch, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$content = curl_exec($ch);
// fix the document, as it's xml
$tidy = new Tidy();
$tidy->parseString($content, ['input-xml' => true, 'output-xml' => true, 'wrap' => false], 'utf8');
$tidy->cleanRepair();
$content = (string) $tidy;
// load the string as simplexml object
$xml = simplexml_load_string($content);
// registering the namespace, so we can search
$xml->registerXPathNamespace('xmlns', 'http://www.w3.org/1999/xhtml');
$eventsHeaders = [];
foreach ($xml->xpath('//xmlns:h2') as $node) {
    // remove if present
    unset($node->span);
    // if the href is there, let's parse it
    if (isset($node->a['href'])) {
        $link = (string) $node->a['href'];
    } else {
        $link = null;
    }
    $eventsHeaders[] = ['title' => trim(strip_tags($node->asXml())), 'link' => $link];
}
$finish = microtime(true);
Exemplo n.º 23
0
function ob_sessrewrite($buffer)
{
    global $scripturl, $modSettings, $context, $user_info, $txt, $time_start, $db_count;
    /*
     * tidy support as a debugging option to generate prettified output 
     * and only do it for the admin when 'tidyup' is set in the request string (tidy can be slow) 
     * pretty HTML output might help with debugging templates
     */
    if (isset($_REQUEST['tidyup']) && !isset($_REQUEST['xml']) && class_exists('Tidy') && $user_info['is_admin']) {
        $tidy = new Tidy();
        $tidy_config = array('indent' => true, 'output-html' => true, 'wrap' => 0, 'merge-divs' => false, 'merge-spans' => false);
        $tidy->parseString($buffer, $tidy_config, 'utf8');
        $buffer = $tidy;
    }
    // If $scripturl is set to nothing, or the SID is not defined (SSI?) just quit.
    if ($scripturl == '' || !defined('SID')) {
        return $buffer;
    }
    // rewrite urls with PHPSESSID, but only if the session isn't cookied and NOT for spiders
    if (empty($_COOKIE) && SID != '' && empty($context['browser']['possibly_robot'])) {
        $buffer = preg_replace('/"' . preg_quote($scripturl, '/') . '(?!\\?' . preg_quote(SID, '/') . ')\\??/', '"' . $scripturl . '?' . SID . '&amp;', $buffer);
    } elseif (isset($_GET['debug'])) {
        $buffer = preg_replace('/(?<!<link rel="canonical" href=)"' . preg_quote($scripturl, '/') . '\\??/', '"' . $scripturl . '?debug;', $buffer);
    }
    $now = microtime();
    $context['load_time'] = round(array_sum(explode(' ', $now)) - array_sum(explode(' ', $time_start)), 3);
    $context['load_queries'] = $db_count;
    $context['template_benchmark_time'] = round(array_sum(explode(' ', $now)) - array_sum(explode(' ', $context['template_benchmark'])), 3);
    if (!empty($modSettings['simplesef_enable'])) {
        $buffer = isset($context['sef_full_rewrite']) ? SimpleSEF::ob_simplesef($buffer) : SimpleSEF::ob_simplesef_light($buffer);
        //$buffer .= SimpleSEF::$debug_info;
    }
    $_t = EoS_Smarty::isActive() ? 's template-smarty), ' : 's template), ';
    $buffer = str_replace('@%%__loadtime__%%@', $user_info['is_admin'] ? $context['load_time'] . 's CPU (' . $context['template_benchmark_time'] . $_t . $context['load_queries'] . ' ' . $txt['queries'] . SimpleSEF::getPerfData() : '', $buffer);
    if (isset($_REQUEST['xml'])) {
        $buffer = ltrim($buffer);
    }
    return $buffer;
}
Exemplo n.º 24
0
    function __toString()
    {
        $this->set('jQuery', '<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>');
        $this->set('bootstrap', '<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>');
        $this->time = round(microtime() - $this->time, 4);
        $this->set("time", $this->time);
        $text = $this->parseVal($this->str);
        if (Config\Main::$tidyEnabled) {
            $tidy = new \Tidy();
            $tidy->parseString($text, ["wrap" => 160]);
            $tidy->cleanRepair();
            $text = $tidy;
        }
        return (string) $text;
    }
Exemplo n.º 25
0
 /**
 		Clean and repair HTML
 			@return string
 			@param $html string
 			@public
 	**/
 static function tidy($html)
 {
     if (!extension_loaded('tidy')) {
         return $html;
     }
     $tidy = new Tidy();
     $tidy->parseString($html, self::$vars['TIDY'], str_replace('-', '', self::$vars['ENCODING']));
     $tidy->cleanRepair();
     return (string) $tidy;
 }
Exemplo n.º 26
0
error_reporting(E_ALL);
//////////////// VARIABLES ///////////////////
// some, but not all options for tidy
// found on tidy.sourceforge.net/docs/quickref.html
// and tidy.sourceforge.net/docs/tidy_man.html
// '::' marks all as optional
$short_options = 'o::' . 'f::' . 'm::' . 'i::' . 'w::' . 'u::' . 'c::' . 'b::' . 'n::' . 'e::' . 'q::' . 'v::' . 'h::';
$long_options = array('add-xml-decl::', 'add-xml-space::', 'alt-text::', 'anchor-as-name::', 'assume-xml-procins::', 'bare::', 'clean::', 'css-prefix::', 'decorate-inferred-ul::', 'doctype DocType auto::', 'drop-empty-paras::', 'drop-font-tags::', 'drop-proprietary-attributes::', 'enclose-block-text::', 'enclose-text::', 'escape-cdata::', 'fix-backslash::', 'fix-bad-comments::', 'fix-uri::', 'hide-comments::', 'hide-endtags::', 'indent-cdata::', 'input-xml::', 'join-classes::', 'join-styles::', 'literal-attributes::', 'logical-emphasis::', 'lower-literals::', 'merge-divs::', 'merge-spans::', 'ncr::', 'new-blocklevel-tags::', 'new-empty-tags::', 'new-inline-tags::', 'new-pre-tags::', 'numeric-entities::', 'output-html::', 'output-xhtml::', 'output-xml::', 'preserve-entities::', 'quote-ampersand::', 'quote-marks::', 'quote-nbsp::', 'repeated-attributes::', 'replace-color::', 'show-body-only::', 'uppercase-attributes::', 'uppercase-tags::', 'word-2000::', 'break-before-br::', 'indent::', 'indent-attributes::', 'indent-spaces::', 'markup::', 'punctuation-wrap::', 'sort-attributes::', 'split::', 'tab-size::', 'vertical-space::', 'wrap::', 'wrap-asp::', 'wrap-attributes::', 'wrap-jste::', 'wrap-php::', 'wrap-script-literals::', 'wrap-sections::', 'ascii-chars::', 'char-encoding::', 'input-encoding::', 'language::', 'newline::', 'output-bom::', 'output-encoding::', 'error-file::', 'force-output::', 'gnu-emacs::', 'gnu-emacs-file::', 'keep-time::', 'output-file::', 'tidy-mark::', 'write-back::');
///////////// PROCEDURES ////////////////
if (!version_compare(phpversion(), "5.2", ">=")) {
    fwrite(STDERR, "Error: tidy.php requires PHP 5.2 or newer.\n");
    exit(1);
}
if (!class_exists('Tidyx') && !class_exists('Tidy')) {
    fwrite(STDERR, "Error: tidy.php requires PHP 5.2 with libtidy support built in.\n");
    exit(1);
}
// Parse arguments using the lists above.
$arguments = getopt($short_options, $long_options);
$input = stream_get_contents(STDIN);
try {
    $tidy = new Tidy();
    $tidy->parseString($input, $arguments, 'utf8');
} catch (Exception $e) {
    fwrite(STDERR, "Error: PHP doesn't have libtidy installed.\n");
    exit(1);
}
fwrite(STDOUT, (string) $tidy);
if ($tidy->errorBuffer) {
    fwrite(STDERR, $tidy->errorBuffer);
}
 public function generateFromTokens($tokens)
 {
     if (!$tokens) {
         return '';
     }
     $html = '';
     for ($i = 0, $size = count($tokens); $i < $size; $i++) {
         if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
             $html .= $this->generateFromToken($tokens[$i++]);
             $html .= $this->generateScriptFromToken($tokens[$i++]);
         }
         $html .= $this->generateFromToken($tokens[$i]);
     }
     if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
         $tidy = new Tidy();
         $tidy->parseString($html, array('indent' => true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68), 'utf8');
         $tidy->cleanRepair();
         $html = (string) $tidy;
     }
     if ($this->config->get('Core.NormalizeNewlines')) {
         $nl = $this->config->get('Output.Newline');
         if ($nl === null) {
             $nl = PHP_EOL;
         }
         if ($nl !== "\n") {
             $html = str_replace("\n", $nl, $html);
         }
     }
     return $html;
 }
Exemplo n.º 28
0
/**
 * Custom function that formats a string of HTML using Tidy
 * @param string $string
 */
function unl_tidy($string)
{
    if (class_exists('Tidy') && variable_get('unl_tidy')) {
        $tidy = new Tidy();
        // Tidy options: http://tidy.sourceforge.net/docs/quickref.html
        $options = array('doctype' => 'omit', 'new-blocklevel-tags' => 'article,aside,header,footer,section,nav,hgroup,address,figure,figcaption,output', 'new-inline-tags' => 'video,audio,canvas,ruby,rt,rp,time,code,kbd,samp,var,mark,bdi,bdo,wbr,details,datalist,source,summary', 'output-xhtml' => true, 'show-body-only' => true, 'indent' => true, 'indent-spaces' => 2, 'vertical-space' => false, 'wrap' => 140, 'wrap-attributes' => false, 'force-output' => true, 'quiet' => true, 'tidy-mark' => false);
        // Add &nbsp; to prevent Tidy from removing script or comment if it is the first thing
        if (strtolower(substr(trim($string), 0, 7)) == '<script' || substr(trim($string), 0, 4) == '<!--') {
            $statement = '';
            if (substr(trim($string), 0, 9) !== '<!-- Tidy') {
                $statement = "<!-- Tidy: Start field with something other than script or comment to remove this -->\n";
            }
            $string = "&nbsp;" . $statement . $string;
        }
        $tidy->parseString($string, $options, 'utf8');
        if ($tidy->cleanRepair()) {
            return $tidy;
        }
    }
    return $string;
}
Exemplo n.º 29
0
 protected function cleanup(&$source)
 {
     $tidy = new Tidy();
     $tidy->parseString($source, array('drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'hide-comments' => true, 'numeric-entities' => true, 'output-xhtml' => true, 'wrap' => 0, 'bare' => true, 'word-2000' => true, 'new-blocklevel-tags' => 'section nav article aside hgroup header footer figure figcaption ruby video audio canvas details datagrid summary menu', 'new-inline-tags' => 'time mark rt rp output progress meter', 'new-empty-tags' => 'wbr source keygen command'), 'utf8');
     $source = $tidy->body()->value;
 }
 public function htmlIndentation($html)
 {
     if (class_exists('tidy')) {
         $config = array('char-encoding' => 'utf8', 'vertical-space' => false, 'indent' => true, 'wrap' => 0, 'word-2000' => 1, 'break-before-br' => true, 'indent-cdata' => true);
         $tidy = new \Tidy();
         $tidy->parseString($html, $config);
         return str_replace('>' . PHP_EOL . '</', '></', tidy_get_output($tidy));
     } else {
         return $html;
     }
 }