예제 #1
0
파일: Page.php 프로젝트: broozer/psa
 public function __destruct()
 {
     try {
         if (!self::$html_set || !self::$head_set) {
             throw new PageException("<b>HTML class exception.</b><br />Either &lt;html&gt; or &lt;head&gt; or &lt;body&gt; is not set.</b><br />\n\t\t\t\t\tAll these tags need to be used in order to generate valid html forms.");
         }
         self::$output .= "</body>\n</html>";
         if (self::$debug) {
             echo '<b>Tidy messages</b><br />';
             $tidy = tidy_parse_string(self::$output);
             echo nl2br(htmlentities(tidy_get_error_buffer($tidy)));
             echo '<hr />';
             $linedump = explode("\n", nl2br(htmlentities(str_replace("<br />", "\n", self::$output))));
             // var_dump($linedump);
             for ($i = 0; $i < sizeof($linedump); ++$i) {
                 if (trim(str_replace("<br>", "", $linedump[$i])) == '') {
                     continue;
                 }
                 $il = strlen($i);
                 $il4 = 4 - $il;
                 $j = $i + 1;
                 $linenr = str_repeat("&nbsp;", $il4) . $j;
                 echo $linenr . ' : ' . $linedump[$i];
             }
             echo '<hr />';
             self::$output = '';
         } else {
             echo self::$output;
             self::$output = '';
         }
     } catch (PageException $e) {
         echo $e->getMessage();
     }
 }
예제 #2
0
function tidy_html($html)
{
    $tidy_config = array('output-xhtml' => true, 'show-body-only' => true);
    $tidy = tidy_parse_string($html, $tidy_config, 'UTF8');
    $tidy->cleanRepair();
    return tidy_get_output($tidy);
}
예제 #3
0
 public function afterRender($event, $view)
 {
     $tidyConfig = array('clean' => true, 'output-xhtml' => true, 'show-body-only' => true, 'wrap' => 0);
     $tidy = tidy_parse_string($view->getContent(), $tidyConfig, 'UTF8');
     $tidy->cleanRepair();
     $view->setContent((string) $tidy);
 }
예제 #4
0
 protected function tidy($content)
 {
     $config = array('output-xhtml' => true);
     $tidy = tidy_parse_string($content, $config, 'utf8');
     $tidy->cleanRepair();
     return (string) $tidy;
 }
예제 #5
0
/**
 * @param $url
 * @param bool $use_tidy
 * @return array
 */
function get_url($url, $use_tidy = TRUE)
{
    global $cookies;
    $smarty = TikiLib::lib('smarty');
    $result = array();
    $get = get_from_dom($url->getElementsByTagName('get')->item(0));
    $post = get_from_dom($url->getElementsByTagName('post')->item(0));
    $xpath = $url->getElementsByTagName('xpath')->item(0)->textContent;
    $data = $url->getElementsByTagName('data')->item(0)->textContent;
    $urlstr = $url->getAttribute("src");
    $referer = $url->getAttribute("referer");
    $result['data'] = $data;
    if (extension_loaded("tidy")) {
        $data = tidy_parse_string($data, array(), 'utf8');
        tidy_diagnose($data);
        if ($use_tidy) {
            $result['ref_error_count'] = tidy_error_count($data);
            $result['ref_error_msg'] = tidy_get_error_buffer($data);
        }
    } else {
        $result['ref_error_msg'] = tra("Tidy Extension not present");
    }
    $result['url'] = $urlstr;
    $result['xpath'] = $xpath;
    $result['method'] = $url->getAttribute("method");
    $result['post'] = $post;
    $result['get'] = $get;
    $result['referer'] = $referer;
    return $result;
}
예제 #6
0
 /**
  * Filter a content item's content
  *
  * @return string
  */
 function filter($item, $field = "content", $length = 0)
 {
     $nodefilters = array();
     if (is_a($item, 'Zoo_Content_Interface')) {
         $txt = $item->{$field};
         $nodefilters = Zoo::getService('content')->getFilters($item);
     } else {
         $txt = $item;
     }
     if ($length > 0) {
         $txt = substr($txt, 0, $length);
     }
     if (count($nodefilters)) {
         $ids = array();
         foreach ($nodefilters as $nodefilter) {
             $ids[] = $nodefilter->filter_id;
         }
         $filters = Zoo::getService('filter')->getFilters($ids);
         foreach ($filters as $filter) {
             $txt = $filter->filter($txt);
         }
         if (extension_loaded('tidy')) {
             $config = array('indent' => TRUE, 'show-body-only' => TRUE, 'output-xhtml' => TRUE, 'wrap' => 0);
             $tidy = tidy_parse_string($txt, $config, 'UTF8');
             $tidy->cleanRepair();
             $txt = tidy_get_output($tidy);
         }
     } else {
         $txt = htmlspecialchars($txt);
     }
     return $txt;
 }
예제 #7
0
 public function tidy($html, $encoding = 'utf-8')
 {
     if ($html == '') {
         return false;
     }
     $output = '';
     $html = trim($html);
     //对于非utf-8编辑处理
     if ($encoding !== 'utf-8') {
         $html = BaseModelCommon::convertEncoding($html, 'utf-8', $encoding);
     }
     $html = preg_replace("|\\/\\*(.*)\\*\\/|sU", "", $html);
     //过滤掉全部注释内容
     $html = preg_replace("/<!\\[CDATA\\[(.*?)\\]\\]>/is", "\\1", $html);
     //过滤掉CDATA标签
     $html = $this->_escapeUnicode($html);
     //转义Unicode字符
     $tidy_conf = array('output-xhtml' => true, 'show-body-only' => true, 'join-classes' => true);
     $html = str_replace("&", "&amp;", $html);
     $dom = tidy_parse_string($html, $tidy_conf, 'utf8');
     $body = $dom->body();
     if ($body->child) {
         foreach ($body->child as $child) {
             $this->_filterNode($child, $output);
         }
     }
     $html = $this->_unEscapeUnicode($output);
     //反转义Unicode字符
     if ($encoding !== 'utf-8') {
         $html = BaseModelCommon::convertEncoding($html, $encoding, 'utf-8');
     }
     $html = $this->_insertVideo($html);
     return $html;
 }
예제 #8
0
파일: tidyPQ.php 프로젝트: richthegeek/Misc
 function load_html($html)
 {
     $tidy = tidy_parse_string($html);
     tidy_clean_repair($tidy);
     $html = tidy_get_html($tidy);
     phpQuery::unloadDocuments();
     return phpQuery::newDocumentHTML($html);
 }
예제 #9
0
 public function cleanHtml($html, $encoding = 'utf8')
 {
     $tidy = tidy_parse_string($html, $this->options(), $encoding = 'utf8');
     $tidy->cleanRepair();
     $html = join('', $tidy->body()->child ?: []);
     $html = str_replace(PHP_EOL, '', $html);
     return $html;
 }
예제 #10
0
 public function prepareHtmlInput($html)
 {
     $config = array('wrap' => false, 'show-body-only' => true);
     $tidyNode = tidy_parse_string($html, $config, 'utf8')->body();
     $htmlArray = $this->toArray($tidyNode);
     $html = implode("\n", $htmlArray);
     return $html;
 }
예제 #11
0
파일: Image.php 프로젝트: alcodo/alpaca
 /**
  * Return pretty html.
  *
  * @param $html
  * @return mixed
  */
 public function getPrettyHtml($html)
 {
     // TODO without tidy support
     $params = ['show-body-only' => true, 'indent' => true, 'output-html' => true, 'wrap' => 200];
     $tidy = tidy_parse_string($html, $params, 'UTF8');
     $tidy->cleanRepair();
     $this->htmlOutput = $tidy;
     return $tidy;
 }
예제 #12
0
 private function tidy($html)
 {
     if (function_exists('tidy_parse_string')) {
         $tidy = tidy_parse_string($html, array(), 'UTF8');
         $tidy->cleanRepair();
         $html = $tidy->value;
     }
     return $html;
 }
예제 #13
0
 /**
  *    Reads the raw content the page using HTML Tidy.
  *    @param $response SimpleHttpResponse  Fetched response.
  *    @return SimplePage                   Newly parsed page.
  */
 function parse($response)
 {
     $this->page = new SimplePage($response);
     $tidied = tidy_parse_string($input = $this->insertGuards($response->getContent()), array('output-xml' => false, 'wrap' => '0', 'indent' => 'no'), 'latin1');
     $this->walkTree($tidied->html());
     $this->attachLabels($this->widgets_by_id, $this->labels);
     $this->page->setForms($this->forms);
     $page = $this->page;
     $this->free();
     return $page;
 }
예제 #14
0
 public function __construct($fileName)
 {
     $tidy = tidy_parse_string(utf8_encode(file_get_contents($fileName)));
     $tidy->cleanRepair();
     $html = $tidy->html();
     $html = $html->value;
     $html = $this->removeTags($html, ["script", "style"]);
     $this->dom = new DOMDocument();
     $this->dom->preserveWhiteSpace = false;
     $this->dom->loadHTML($html);
 }
예제 #15
0
 /**
  * Straight forward string replacement template engine, could be replaced by a full
  * template engine if scope increased.
  * @return  string
  */
 public function render()
 {
     $template_copy = $this->template;
     $template_copy = str_replace('{DESTINATION NAME}', $this->destination->title, $template_copy);
     $template_copy = str_replace('{CONTENT}', $this->destination->getBodyHtml(), $template_copy);
     $template_copy = str_replace('{NAVIGATION}', $this->renderNavigation(), $template_copy);
     //clean up the html to make reviewing easier
     $tidy = tidy_parse_string($template_copy, ['indent' => true, 'output-xhtml' => true, 'wrap' => 0], 'utf8');
     $tidy->cleanRepair();
     return (string) $tidy;
 }
 /**
  * Retrieves the actual output intend and parses it to tidyPHP for cleanup
  * cleaned up content the gets set to the grav context again.
  */
 public function onOutputGenerated()
 {
     if ($this->skipCurrentSite($this->grav['uri']->path())) {
         return;
     }
     $originOutput = $this->grav->output;
     $config = array('indent' => $this->_getConfigSetting('indent'), 'indent-spaces' => $this->_getConfigSetting('indent_spaces'), 'wrap' => $this->_getConfigSetting('wrap'), 'hide-comments' => $this->_getConfigSetting('hide_comments'), 'new-blocklevel-tags' => implode(' ', $this->_getConfigSetting('blocklevel_tags')), 'new-empty-tags' => implode(' ', $this->_getConfigSetting('empty_tags')), 'new-inline-tags' => implode(' ', $this->_getConfigSetting('inline_tags')), 'newline' => 'LF');
     /** @var tidy $tidy */
     $tidy = tidy_parse_string($originOutput, $config, 'UTF8');
     $tidy->cleanRepair();
     $this->grav->output = $tidy;
 }
예제 #17
0
파일: Tidy.php 프로젝트: niden/kolibri
 public function afterRender($event, $view)
 {
     if (!extension_loaded('tidy')) {
         return;
     }
     $options = array('hide-comments' => true, 'tidy-mark' => false, 'indent' => true, 'indent-spaces' => 4, 'new-blocklevel-tags' => 'article,header,footer,section,nav', 'new-inline-tags' => 'video,audio,canvas,ruby,rt,rp', 'doctype' => '<!DOCTYPE HTML>', 'sort-attributes' => 'alpha', 'vertical-space' => false, 'output-xhtml' => true, 'wrap' => 150, 'wrap-attributes' => false, 'break-before-br' => false);
     $buffer = tidy_parse_string($view->getContent(), $options, 'utf8');
     tidy_clean_repair($buffer);
     $buffer = str_replace(array('<html lang="en" xmlns="http://www.w3.org/1999/xhtml">', '<html xmlns="http://www.w3.org/1999/xhtml">'), '<!DOCTYPE html>', $buffer);
     $buffer = str_replace(">\n</script>", "></script>", $buffer);
     $view->setContent((string) $buffer);
 }
예제 #18
0
 /**
  * Trims content, then trims each line of content
  *
  * @param string $content
  * @param string $encoding
  * @throws \RuntimeException
  * @return string
  */
 public function render($content = null, $encoding = 'utf8')
 {
     if (null === $content) {
         $content = $this->renderChildren();
     }
     if (true === $this->hasTidy) {
         $tidy = tidy_parse_string($content, [], $encoding);
         $tidy->cleanRepair();
         return (string) $tidy;
     }
     throw new \RuntimeException('TidyViewHelper requires the PHP extension "tidy" which is not installed or not loaded.', 1352059753);
 }
예제 #19
0
파일: output.php 프로젝트: eodivision/eoCMS
function output($title = '', $body = '', $head = '')
{
    global $settings, $authid, $checkleft, $checkright, $head, $error, $error_die;
    if (theme('output_error') != false) {
        $body = theme('output_error');
        $title = 'Error';
        $panels = false;
        $lowerpanel = false;
        $panel = '';
        unset($error_die);
    } else {
        $panels = true;
    }
    //display panels
    if ($panels != false) {
        $panel = theme('displaypanels');
        $lowerpanel = theme('displaylowerpanel');
    }
    if (isset($error) && !empty($error)) {
        $errors = '<br />' . theme('title', 'Error') . theme('start_content') . '<div class="errors"><ul>';
        foreach ($error as $error1) {
            $errors .= '<li>' . $error1 . '</li>';
        }
        $errors .= '</ul></div>' . theme('end_content');
        unset($error);
    } else {
        $errors = '';
    }
    if (isset($_GET['page']) && $_GET['page'] > 1) {
        $title = $title . ' - Page ' . $_GET['page'];
    }
    $output = theme('head', stripslashes($title), $head) . '<body>';
    if ($settings['maintenance_mode'] == 'on') {
        $output .= '<div class="titlebg">WARNING: Maintenance Mode is on</div>';
    }
    $output .= '<div id="container">
	' . theme('top') . theme('links');
    $output .= $panel;
    //display the data
    $output .= $errors . '<br />' . stripslashes($body);
    $output .= $lowerpanel . theme('footer');
    //SEO Friendly Links
    include IN_PATH . '/functions/seofriendlyurls.php';
    //Check if the tidy library is installed
    if (extension_loaded('tidy')) {
        //yay it is, lets clean up all the HTML, so it looks all nice in View Source in your browser :)
        $options = array("indent" => true, 'wrap' => 0);
        $output = tidy_parse_string($output, $options);
        tidy_clean_repair($output);
    }
    die($output);
}
예제 #20
0
 /**
  * Trims content, then trims each line of content
  *
  * @param string $content
  * @throws \RuntimeException
  * @return string
  */
 public function render($content = NULL)
 {
     if (NULL === $content) {
         $content = $this->renderChildren();
     }
     if (TRUE === $this->hasTidy) {
         $configuration = array('output-xml' => TRUE, 'input-xml' => TRUE, 'indent' => TRUE, 'quote-nbsp' => FALSE, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'char-encoding' => 'utf8');
         $tidy = tidy_parse_string($content, $configuration);
         $tidy->cleanRepair();
         return (string) $tidy;
     }
     throw new \RuntimeException('TidyViewHelper requires the PHP extension "tidy" which is not installed or not loaded.', 1352059753);
 }
 private function connect(\Step\Api\TokenUser $I, $url, $params = [], $acceptHeaders = 'application/json, text/javascript, */*;q=0.01')
 {
     $I->haveHttpHeader('Accept', $this->browserHeader);
     $I->sendGET('/');
     $html = $I->grabResponse();
     $tidy = tidy_parse_string($html);
     $head = $tidy->head();
     $requestToken = $head->attribute['data-requesttoken'];
     $I->haveHttpHeader('Accept', $acceptHeaders);
     $I->haveHttpHeader('requesttoken', $requestToken);
     $params = array_merge($params, ['token' => $this->folderMetaData['token'], 'password' => $this->folderMetaData['password']]);
     $I->sendGET($url, $params);
 }
예제 #22
0
 public static function purifyHtml($code)
 {
     //$code is not a complete page so we need to wrap it!
     $head = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">  <head>    <title>Just A Wrapper</title><meta http-equiv="content-type" content="text/html;charset=UTF-8"/>  </head> <!--wrapdelimiter--><body>';
     $tail = ' </body><!--wrapdelimiter--></html>';
     $c = $head . $code . $tail;
     $config = array('indent' => false, 'output-xhtml' => TRUE, 'wrap' => 0);
     $c2 = tidy_parse_string($c, $config, 'UTF8');
     $arr = explode("<!--wrapdelimiter-->", $c2);
     $out = $arr[1];
     $out = str_replace("<body>", "", $out);
     $out = str_replace("</body>", "", $out);
     return $out;
 }
예제 #23
0
 /**
  * tidyClean 
  * clean html source code
  * 
  * @param mixed $html 
  * @param array $options 
  * @access private
  * @return void
  */
 private function tidyClean($html, $options = array())
 {
     if (!$this->tidyAvailable) {
         return $html;
     }
     $search = array("'<script[^>]*?>.*?</script>'si", "'<style[^>]*?>.*?</style>'si");
     $html = preg_replace($search, "", $html);
     if (empty($options)) {
         $options = array('indent' => true, 'show-body-only' => true);
     }
     $tidy = tidy_parse_string($html, $options, "utf8");
     $tidy->cleanRepair();
     return $tidy->value;
 }
예제 #24
0
/**
 * Turn a string or array into valid, standards-compliant (x)HTML
 *
 * Uses configuraton options in tidy.conf - which should minimally have show-body-only set to yes
 *
 * @param mixed $text The data to be tidied up
 * @return mixed $result Tidied data
 */
function tidy($text)
{
    static $tidy_funcs;
    static $tidy_conf;
    if (!isset($tidy_conf)) {
        $tidy_conf = SETTINGS_INC . 'tidy.conf';
    }
    if (is_array($text)) {
        $result = array();
        foreach (array_keys($text) as $key) {
            $result[$key] = tidy($text[$key]);
        }
        return $result;
    }
    // determine what tidy libraries are available
    if (empty($tidy_funcs)) {
        $tidy_funcs = get_extension_funcs('tidy');
    }
    $tidy_1_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) !== false;
    $tidy_2_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) === false;
    $tidy_command_line_available = TIDY_EXE ? file_exists(TIDY_EXE) : false;
    $text = protect_string_from_tidy($text);
    $text = '<html><body>' . $text . '</body></html>';
    if ($tidy_2_lib_available) {
        $tidy = new tidy();
        $tidy->parseString($text, $tidy_conf, 'utf8');
        $tidy->cleanRepair();
        $result = $tidy;
    } elseif ($tidy_1_lib_available) {
        tidy_load_config($tidy_conf);
        tidy_set_encoding('utf8');
        tidy_parse_string($text);
        tidy_clean_repair();
        $result = tidy_get_output();
    } elseif ($tidy_command_line_available) {
        $arg = escapeshellarg($text);
        // escape the bad stuff in the text
        $cmd = 'echo ' . $arg . ' | ' . TIDY_EXE . ' -q -config ' . $tidy_conf . ' 2> /dev/null';
        // the actual command - pipes the input to tidy which diverts its output to the random file
        $result = shell_exec($cmd);
        // execute the command
    } else {
        trigger_error('tidy does not appear to be available within php or at the command line - no tidying is taking place.');
        $result = $text;
    }
    return trim($result);
}
 /**
  * Loads an HTML string
  *
  * @param string $str HTML text to load
  */
 function load_html($str)
 {
     try {
         $tidy = new tidy();
         $tidy = tidy_parse_string($str);
         //$tidy->cleanRepair();
         $html = $tidy->html();
         $str = $html->value;
     } catch (Exception $e) {
         //avisar al usuario de que no esta disponible tidy
         $doc = new DOMDocument();
         $doc->loadHTML($str);
         $str = @$doc->saveHTML();
     }
     $str = preg_replace('/>\\s*?</', '><', $str);
     $str = str_replace('</body>', '<close></body>', $str);
     parent::load_html($str);
 }
예제 #26
0
 function perform()
 {
     $q = DB::query('SELECT link, neighborhood FROM listings WHERE scraped != TRUE', PDO::FETCH_ASSOC);
     $ps = DB::prepare('UPDATE listings SET scraped=TRUE, street=:street, description=:description, lat=:lat, lng=:lng WHERE link=:link');
     /*
     Guzzle::sendAll(array_map(function ($listing) {
       return Guzzle::createRequest('GET', 'http://newyork.craigslist.org' . $listing['link']);
     }, iterator_to_array($q)), ['complete' => function ($event) use($ps) {
       try {        
         $body = $event->getResponse()->getBody();
       
         $crawler = new Crawler($body);
         $readability = new Readability($body);
     
         $street = $crawler->filter('.mapAndAttrs > .mapbox > div.mapaddress');
       
         $ps->execute([
           ':link' => parse_url($event->getRequest()->getUrl())['path'],
           ':lat'  => null,
           ':lng'  => null,
           ':street' => $street->count() ? $street->text() : null,
           ':description' => $readability->init() ? trim(strip_tags(tidy_parse_string($readability->getContent()->innerHTML, [], 'UTF8'))) : null    
         ]);
       } catch (Exception $e) {
         Logger::error($e->getMessage(), $ps->errorinfo());
       }
     }]);
     */
     foreach ($q as $listing) {
         try {
             $body = Guzzle::get('http://newyork.craigslist.org' . $listing['link'])->getBody();
             $crawler = new Crawler($body);
             $readability = new Readability($body);
             $street = $crawler->filter('.mapAndAttrs > .mapbox > div.mapaddress');
             $url = 'http://maps.googleapis.com/maps/api/geocode/json?address=' . ($street->count() ? $street->text() : $listing['neighborhood']);
             $json = json_decode(Guzzle::get($url)->getBody(), true);
             $loc = isset($json['results'][0]) ? $json['results'][0]['geometry']['location'] : null;
             $ps->execute([':link' => $listing['link'], ':lat' => isset($loc['lat']) ? $loc['lat'] : null, ':lng' => isset($loc['lng']) ? $loc['lng'] : null, ':street' => $street->count() ? $street->text() : null, ':description' => $readability->init() ? trim(strip_tags(tidy_parse_string($readability->getContent()->innerHTML, [], 'UTF8'))) : null]);
         } catch (Exception $e) {
             Logger::error($e->getMessage(), $ps->errorinfo());
         }
     }
 }
예제 #27
0
 /**
  * Use the HTML tidy PECL extension to use the tidy library in-process,
  * saving the overhead of spawning a new process. Currently written to
  * the PHP 4.3.x version of the extension, may not work on PHP 5.
  *
  * 'pear install tidy' should be able to compile the extension module.
  */
 private static function internal($text)
 {
     global $wgTidyConf;
     $fname = 'Parser::internalTidy';
     wfProfileIn($fname);
     tidy_load_config($wgTidyConf);
     tidy_set_encoding('utf8');
     tidy_parse_string($text);
     tidy_clean_repair();
     if (tidy_get_status() == 2) {
         // 2 is magic number for fatal error
         // http://www.php.net/manual/en/function.tidy-get-status.php
         $cleansource = null;
     } else {
         $cleansource = tidy_get_output();
     }
     wfProfileOut($fname);
     return $cleansource;
 }
function enlight_xpath($url, $xpath)
{
	global $smarty, $cookies,$base_url;
	static $purifier;
	static $loaded = false;

	$result = array();
	$data = $url->getElementsByTagName('data')->item(0)->textContent;
	if (trim($data) == '') {
		return tra('The page is empty');
	}

	if (extension_loaded('tidy')) {
		$data = tidy_parse_string($data, array(), 'utf8');
		tidy_diagnose($data);
	} else {
		if (!$loaded) {
			require_once('lib/htmlpurifier_tiki/HTMLPurifier.tiki.php');
			$config = getHTMLPurifierTikiConfig();
			$config->set('Attr.EnableID', true);
			$purifier = new HTMLPurifier($config);
			$loaded = true;
		}
		if ($purifier) {
			$data = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>'.$purifier->purify($data).'</body></html>';
			//$data = $purifier->purify($data);
		}
	}

	$dom_ref = DOMDocument::loadHTML($data);
	$xp_ref = new DomXPath($dom_ref);
	$res_ref = $xp_ref->query('//head');
	$base = $dom_ref->createElement('base');
	$base->setAttribute('href', $base_url);
	$res_ref->item(0)->insertBefore($base, $res_ref->item(0)->firstChild);
	$res_ref = $xp_ref->query($xpath);
	foreach ($res_ref as $ref) {
		$ref->setAttribute('style', 'background-color: red;');
	}

	return $dom_ref->saveHTML();
}
예제 #29
0
function html_standardization($html)
{
    if (!function_exists('tidy_repair_string')) {
        return $html;
    }
    $str = tidy_repair_string($html, array('output-xhtml' => true), 'utf8');
    if (!$str) {
        return $html;
    }
    $str = tidy_parse_string($str, array('output-xhtml' => true), 'utf8');
    $standard_html = '';
    $nodes = @tidy_get_body($str)->child;
    if (!is_array($nodes)) {
        $returnVal = 0;
        return $html;
    }
    foreach ($nodes as $n) {
        $standard_html .= $n->value;
    }
    return $standard_html;
}
    public function tidy($content, $stripWord = false)
    {

        // Try to use the extension first
        if (extension_loaded('tidy')) {
            $tidy = tidy_parse_string($content, array(
                'clean' => true,
                'output-xhtml' => true,
                'show-body-only' => true,
                'quote-nbsp'    => true,
                'wrap' => 0,
                'input-encoding' => 'utf8',
                'output-encoding' => 'utf8',
                'new-blocklevel-tags' => 'article aside audio details figcaption figure footer header hgroup nav section source summary temp track video',
                'new-empty-tags' => 'command embed keygen source track wbr',
                'new-inline-tags' => 'audio canvas command datalist embed keygen mark meter output progress time video wbr',
                'bare'                => $stripWord,
                'word-2000' => $stripWord
            ));

            $tidy->cleanRepair();
            return $this->rewriteShortcodes('' . $tidy);
        }

        // No PHP extension available, attempt to use CLI tidy.
        $retval = null;
        $output = null;
        @exec('tidy --version', $output, $retval);
        if ($retval === 0) {
            $tidy = '';
            $input = escapeshellarg($content);
            // Doesn't work on Windows, sorry, stick to the extension.
            $tidy = @`echo $input | tidy -q --show-body-only yes --input-encoding utf8 --output-encoding utf8 --wrap 0 --clean yes --output-xhtml yes`;
            return $this->rewriteShortcodes($tidy);
        }

        // Fall back to default
        $doc = new SS_HTML4Value($content);
        return $doc->getContent();
    }