/** * Filter through Tidy * * @param array * @param string * @param bool * @return bool */ public static function tidy(array &$headers, &$body, $uncached) { $tidy = new tidy(); $tidy->parseString($body, array('clean' => 1, 'bare' => 1, 'hide-comments' => 1, 'doctype' => 'omit', 'indent-spaces' => 0, 'tab-size' => 0, 'wrap' => 0, 'quote-ampersand' => 0, 'output-xhtml' => true, 'quiet' => 1), 'utf8'); $tidy->cleanRepair(); $body = tidy_get_output($tidy); }
public function apply($str) { if (extension_loaded('tidy') && class_exists('tidy')) { $config = array('doctype' => 'strict', 'drop-proprietary-attributes' => true, 'drop-font-tags' => true, 'escape-cdata' => true, 'indent' => false, 'join-classes' => false, 'join-styles' => true, 'lower-literals' => true, 'output-xhtml' => true, 'show-body-only' => true, 'wrap' => 80); $str = '<p>tt</p>' . $str; // Fixes a big issue $tidy = new tidy(); $tidy->parseString($str, $config, 'utf8'); $tidy->cleanRepair(); $str = (string) $tidy; $str = preg_replace('#^<p>tt</p>\\s?#', '', $str); } else { $str = $this->miniTidy($str); } # Removing open comments, open CDATA and processing instructions $str = preg_replace('%<!--.*?-->%msu', '', $str); $str = str_replace('<!--', '', $str); $str = preg_replace('%<!\\[CDATA\\[.*?\\]\\]>%msu', '', $str); $str = str_replace('<![CDATA[', '', $str); # Transform processing instructions $str = str_replace('<?', '>?', $str); $str = str_replace('?>', '?<', $str); $str = html::decodeEntities($str, true); $this->content = ''; xml_parse($this->parser, '<all>' . $str . '</all>'); return $this->content; }
protected function createContentElements(&$objItem) { if ($objItem->tl_content) { // need to wrap <p> around text for contao $tidyConfig = array('enclose-text' => true, 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'quote-ampersand' => true, 'clean' => false); $bodyText = '<!DOCTYPE html><head><title></title></head><body>' . $objItem->tl_content . '</body></html>'; // $bodyText = $this->convert_external_link_tags($bodyText); // $bodyText = $this->convert_internal_link_tags($bodyText); $bodyText = $this->nl2p($bodyText); $tidy = new \tidy(); $tidy->parseString($bodyText, $tidyConfig, $GLOBALS['TL_CONFIG']['dbCharset']); $body = $tidy->body(); $objContent = new \ContentModel(); $objContent->text = trim(str_replace(array('<body>', '</body>'), '', $body)); $objContent->text = preg_replace("/<img[^>]+\\>/i", "", $objContent->text); // strip images // create links from text $objContent->text = preg_replace('!(\\s|^)((https?://|www\\.)+[a-z0-9_./?=&-]+)!i', ' <a href="http://$2" target="_blank">$2</a>', $objContent->text); // replace <b> by <strong> $objContent->text = preg_replace('!<b(.*?)>(.*?)</b>!i', '<strong>$2</strong>', $objContent->text); // replace emails with inserttags $objContent->text = preg_replace('/([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})(\\((.+?)\\))?/i', "{{email::\$1@\$2.\$3}}", $objContent->text); // strip not allowed tags $objContent->text = strip_tags($objContent->text, \Config::get('allowedTags')); $objContent->text = $this->stripAttributes($objContent->text, array('style', 'class', 'id')); $objContent->ptable = static::$strTable; $objContent->pid = $objItem->id; $objContent->sorting = 16; $objContent->tstamp = time(); $objContent->type = 'text'; $objContent->save(); } }
function sphsc3() { $results = array(); $i = 0; do { // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsaud.asp'; //7-8 // 'https://content.sphsc.washington.edu/sphintra/web2/res_blurb.asp'; // 11 // 'https://content.sphsc.washington.edu/sphintra/web2/factoidscore.asp'; // 14 // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsphd.asp'; //15 // 'https://content.sphsc.washington.edu/sphintra/web2/factoidspb.asp'; // 16, 19 // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsmed.asp'; // 17, 20 // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsug.asp'; // 18 // 'https://content.sphsc.washington.edu/sphintra/web2/res_stone_blurb.asp'; //$uri = 'https://content.sphsc.washington.edu/sphintra/web2/factoidscore.asp'; // 19 // No's: // http://content.sphsc.washington.edu/sphintra/web2/clinic_msg.asp // http://content.sphsc.washington.edu/sphintra/web2/home_right.asp // https://content.sphsc.washington.edu/sphintra/web2/res_stone_blurb.asp // http://content.sphsc.washington.edu/sphintra/web2/outreach_msg.asp $tids = array(); $c = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200, 'hide-comments' => true); $response = get_page($uri); $tidy = new tidy(); $tidy->parseString($response, $c, 'utf8'); $tidy->cleanRepair(); preg_match("/<body[^>]*>(.*?)<\\/body>/is", $tidy, $a); $body = str_replace(array("\n", "\r"), '', $a[1]); $title = some_words(strip_tags($body)); $results[$title] = $body; } while ($i++ < 20); foreach ($results as $k => $v) { save_item($k, $v, $tids); } }
function tidyClean() { $tidy = new tidy(); $tidy->parseString(self::$html, self::$tidy_config, self::$encoding); $tidy->cleanRepair(); self::$html = $tidy; }
/** * Detects feed types and instantiate appropriate objects. * * Our constructor takes care of detecting feed types and instantiating * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0 * but raise a warning. I do not intend to introduce full support for * Atom 0.3 as it has been deprecated, but others are welcome to. * * @param string $feed XML serialization of the feed * @param bool $strict Whether or not to validate the feed * @param bool $suppressWarnings Trigger errors for deprecated feed types? * @param bool $tidy Whether or not to try and use the tidy library on input */ function build(DOMDocument $model, $feed, $strict = false, $suppressWarnings = false, $tidy = false) { $options = 0; if ($suppressWarnings) { $options |= LIBXML_NOWARNING; $options |= LIBXML_NOERROR; } if (empty($feed)) { throw new XML_Feed_Parser_Exception('Invalid input: file is empty'); } if (!$model->loadXML($feed, $options)) { if (extension_loaded('tidy') && $tidy) { $tidy = new tidy(); $tidy->parseString($feed, array('input-xml' => true, 'output-xml' => true)); $tidy->cleanRepair(); if (!$model->loadXML((string) $tidy)) { throw new XML_Feed_Parser_Exception('Invalid input: this is not ' . 'valid XML'); } } else { throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML'); } } /* detect feed type */ $doc_element = $model->documentElement; $class = $this->determineClass($doc_element, $suppressWarnings); /* Instantiate feed object */ $feed = new $class($model, $strict); $feed->setSanitizer(new XML_Feed_Parser_Unsafe_Sanitizer()); return $feed; }
public function repair($markup) { $tidy = new \tidy(); $tidy->parseString($markup, self::$config, 'utf8'); $tidy->cleanRepair(); return $tidy . ''; }
private static function send_mail($server_cfg, $game_cfg, $report) { $to = ""; $random_hash = md5(date('r', time())); $mime_boundary = "==Multipart_Boundary_x{$random_hash}x"; if ($game_cfg === null) { $subject = "Instance Utilization Summary"; $bcc = $server_cfg["instance_report_mail_recipients"]; $headers = 'From: ' . $server_cfg["sender"] . "\r\n" . 'Bcc: ' . $bcc . "\r\n" . "Content-Type: multipart/mixed;" . " boundary=\"{$mime_boundary}\"" . 'X-Mailer: PHP/' . phpversion(); $HTMLMessage = file_get_contents($report); $tidy = new tidy(); $tidy->parseString($HTMLMessage); $tidy->cleanRepair(); $message = "\n\n" . "--{$mime_boundary}\n" . "Content-Type:text/html; charset=\"iso-8859-1\"\n" . "Content-Transfer-Encoding: 7bit\n\n" . $tidy . "\n\n"; $reportClass = new ReportCollector($server_cfg); $data = $reportClass->generateCSV(); $message .= "--{$mime_boundary}\n" . "Content-Type:text/csv; \n" . " name=zPerfmonUtilTrend_" . date("MjY") . ".csv \n" . "Content-Transfer-Encoding: 7bit\n\n" . $data . "\n\n" . "--{$mime_boundary}--\n"; mail($to, $subject, $message, $headers); } else { $subject = "Instance Utilization report for {$game_cfg['name']}"; $bcc = $game_cfg["instance_report_mail_recipients"]; $headers = 'From: ' . $server_cfg["sender"] . "\r\n" . 'Bcc: ' . $bcc . "\r\n" . 'Content-Type: text/HTML' . "\r\n" . 'X-Mailer: PHP/' . phpversion(); $message = file_get_contents($report); $tidy = new tidy(); $tidy->parseString($message); $tidy->cleanRepair(); mail($to, $subject, $tidy, $headers); } }
/** * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * @param string $text HTML to check * @param bool $stderr Whether to read result from error status instead of output * @param int &$retval Exit code (-1 on internal error) * @return string|null */ protected function cleanWrapped($text, $stderr = false, &$retval = null) { if (!class_exists('tidy')) { wfWarn("Unable to load internal tidy class."); $retval = -1; return null; } $tidy = new \tidy(); $tidy->parseString($text, $this->config['tidyConfigFile'], 'utf8'); if ($stderr) { $retval = $tidy->getStatus(); return $tidy->errorBuffer; } $tidy->cleanRepair(); $retval = $tidy->getStatus(); if ($retval == 2) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output($tidy); if (!empty($this->config['debugComment']) && $retval > 0) { $cleansource .= "<!--\nTidy reports:\n" . str_replace('-->', '-->', $tidy->errorBuffer) . "\n-->"; } } return $cleansource; }
public function __construct($content) { if (extension_loaded('tidy')) { // using the tidy php extension $tidy = new tidy(); $tidy->parseString($content, array('output-xhtml' => true, 'numeric-entities' => true, 'wrap' => 0), 'utf8'); $tidy->cleanRepair(); $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } elseif (@shell_exec('which tidy')) { // using tiny through cli $CLI_content = escapeshellarg($content); $tidy = `echo {$CLI_content} | tidy --force-output 1 -n -q -utf8 -asxhtml -w 0 2> /dev/null`; $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } else { // no tidy library found, hence no sanitizing $tidy = $content; } $this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING); if (!$this->simpleXML) { throw new Exception('CSSContentParser::__construct(): Could not parse content.' . ' Please check the PHP extension tidy is installed.'); } parent::__construct(); }
/** * * Uses the tidy library to tidy HTML output. * * @access public * * @param string $buffer The source text to be filtered. * */ public function filter($buffer) { $tidy = new tidy(); $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200); $tidy->parseString($buffer, $config); $tidy->cleanRepair(); return $tidy->get_output(); }
public function filter(&$content) { $config = array('clean' => true, 'enclose-block-text' => true, 'enclose-text' => true, 'preserve-entities' => true, 'logical-emphasis' => true, 'char-encoding' => 'utf8', 'indent' => 'auto', 'output-xhtml' => true, 'wrap' => 200); $tidy = new tidy(); $tidy->parseString($this->gethtml($content), $config, 'utf8'); $tidy->cleanRepair(); $content = $this->getbody((string) $tidy); }
function tidy_html($html) { $config = array('indent' => 2, 'output-xhtml' => true, 'doctype' => 'strict', 'wrap' => 120); $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); return $tidy; }
/** * * Cleans the html * @param string $html String cotaining the html to clean * @return string Cleaned an tidyed */ public static function Clean($html) { $html = Html::BurnerClean($html); $config = array("show-body-only" => true, "alt-text" => "Pic without description", "hide-endtags" => false, "output-xhtml" => true); $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); return $tidy; }
/** * Executes any code necessary after applying the filter patterns. * * @param string $text The text after the filtering. * * @return string The modified text. */ public function postProcess($text) { if (!Horde_Util::extensionExists('tidy') || $this->_params['size'] !== false && strlen($text) > $this->_params['size']) { return $text; } $tidy_config = array('enclose-block-text' => true, 'hide-comments' => true, 'indent' => false, 'numeric-entities' => true, 'preserve-entities' => true, 'show-body-only' => !empty($this->_params['body_only']), 'tab-size' => 0, 'wrap' => 0); $tidy = new tidy(); if (strtolower($this->_params['charset']) == 'us-ascii') { if ($tidy->parseString($text, $tidy_config, 'ascii')) { $tidy->cleanRepair(); $text = $tidy->value; } } elseif ($tidy->parseString(Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8'), $tidy_config, 'utf8')) { $tidy->cleanRepair(); $text = Horde_String::convertCharset($tidy->value, 'UTF-8', $this->_params['charset']); } return $text; }
protected function tidy($response) { $response = str_replace(' ', ' ', $response); $config = array('output-xhtml' => true); $tidy = new tidy(); $tidy->parseString($response, $config, 'utf8'); $tidy->cleanRepair(); return (string) $tidy; }
public function getURL($url) { $data = "awerawer"; // in my code, $data is downloaded from a site $tidy = new tidy(); $tidy->parseString($data, $this->tidyconfig, 'utf8'); $tidy->cleanRepair(); return $tidy; }
public static function minify($content, $options = array()) { $options = array_merge(array('clean' => false, 'hide-comments' => true, 'wrap' => 0, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'preserve-entities' => true), $options, array('show-errors' => 0, 'show-warnings' => false, 'force-output' => true, 'tidy-mark' => false)); $tidy = new tidy(); $tidy->parseString($content, $options); $tidy->cleanRepair(); $content = $tidy->value; return $content; }
function tidyhtml($input) { $config = array('show-errors' => 0, 'show-warnings' => false, 'break-before-br' => true, 'indent' => true, 'indent-attributes' => true, 'add-xml-decl' => false, 'force-output' => true, 'fix-backslash' => false, 'merge-divs' => false, 'merge-spans' => false, 'doctype' => 'omit', 'enclose-block-text' => true, 'drop-empty-paras' => false, 'output-html' => true, 'show-body-only' => true, 'wrap' => 1); $tidy = new tidy(); $tidy->parseString($input, $config, 'utf8'); $tidy->cleanRepair(); // Output return $tidy; }
function tidyHtml($html) { $config = ["indent" => 2, "clean" => false, "char-encoding" => "utf8"]; $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); $ret = $tidy->html()->child[1]->value; $ret = substr($ret, 7, -7); return $ret; }
/** * @param $output * @return \tidy */ public function preView($output) { $config = array('show-body-only' => false, 'clean' => true, 'char-encoding' => 'UTF8', 'add-xml-decl' => true, 'add-xml-space' => true, 'output-html' => false, 'output-xml' => false, 'output-xhtml' => true, 'numeric-entities' => false, 'ascii-chars' => false, 'doctype' => 'auto', 'bare' => true, 'fix-uri' => true, 'indent' => true, 'indent-spaces' => 4, 'tab-size' => 4, 'wrap-attributes' => true, 'wrap' => 0, 'indent-attributes' => true, 'join-classes' => true, 'join-styles' => false, 'enclose-block-text' => true, 'fix-bad-comments' => true, 'fix-backslash' => true, 'replace-color' => false, 'wrap-asp' => false, 'wrap-jste' => false, 'wrap-php' => false, 'write-back' => true, 'drop-proprietary-attributes' => false, 'hide-comments' => true, 'hide-endtags' => false, 'literal-attributes' => false, 'drop-empty-paras' => true, 'enclose-text' => true, 'quote-ampersand' => true, 'quote-marks' => false, 'quote-nbsp' => true, 'vertical-space' => true, 'wrap-script-literals' => false, 'tidy-mark' => true, 'merge-divs' => false, 'repeated-attributes' => 'keep-last', 'break-before-br' => true); header('Content-type: text/html; charset=utf-8'); $tidy = new \tidy(); $tidy->parseString($output, $config, 'UTF8'); $tidy->cleanRepair(); return $tidy; // Output }
protected function _tidy($text) { // tidy up the text $tidy = new tidy(); $tidy->parseString($text, array(), 'utf8'); $tidy->cleanRepair(); // get only the body portion $body = tidy_get_body($tidy); return $body->value; }
public function parse($data) { // Specify configuration $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200); // Tidy $tidy = new tidy(); $tidy->parseString($data, $config, 'utf8'); $tidy->cleanRepair(); $S = new HTMLSlicer(); $XML = new SimpleXMLElement($tidy . ""); $rss = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n\t<channel>\n\t\t<title>Notdienste</title>\n\t\t<link>notdienst-portal.de</link>\n\t\t<description>Notdienste</description>\n\t\t<language>de-de</language>\n\t\t<copyright>Bayerische Landesapothekerkammer</copyright>\n\t\t<pubDate>" . date("r") . "</pubDate>\n\n"; foreach ($XML->body->table[0]->tbody[0]->tr as $row) { $lat = "data-lat"; $lon = "data-lon"; #print_r(); #print_r($row->attributes()->$lon); $von = ""; $bis = ""; $title = ""; $content = ""; $i = 0; foreach ($row->td as $cell) { $td = trim(strip_tags($cell->asXML())); $td = trim(str_replace("Entfernung", "", $td)); $ex = explode("\n", $td); foreach ($ex as $k => $line) { $ex[$k] = trim($line); } if ($i == 0) { $title = trim($ex[0]); unset($ex[0]); } foreach ($ex as $k => $l) { if (stripos($l, "von") === 0) { $von = $ex[$k]; unset($ex[$k]); } if (stripos($l, "bis") === 0) { $bis = $ex[$k]; unset($ex[$k]); } } $content .= implode("\n", $ex) . "\n"; $i++; } $content = nl2br(trim($content)); $rss .= "\n\t\t<item>\n\t\t\t<title><![CDATA[" . html_entity_decode($title) . "]]></title>\n\t\t\t<description><![CDATA[" . preg_replace('/ +/', ' ', html_entity_decode($content)) . "]]></description>\n\t\t\t<link></link>\n\t\t\t<author></author>\n\t\t\t<guid></guid>\n\t\t\t<valid>{$von} {$bis}</valid>\n\t\t\t<lat>" . $row->attributes()->{$lat} . "</lat>\n\t\t\t<lon>" . $row->attributes()->{$lon} . "</lon>\n\t\t\t<pubDate>" . date("r") . "</pubDate>\n\t\t</item>"; #print_r($content); } #$X = $XML->xpath("//tr"); #var_dump($X); // Output $rss .= "\n\t</channel>\n</rss>"; return $rss; }
function GetXML($html) { // Specify configuration $config = array('output-xml' => true, 'numeric-entities' => true, 'hide-comments' => true); // Tidy $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); $xHTML = $tidy->html(); return new SimpleXMLElement($xHTML); }
private function purifyHtml($html, $tags = null) { if (class_exists('Tidy') && false) { $config = array('output-xhtml' => true, 'indent' => false); $tidy = new tidy(); $tidy->parseString($html, $config, 'utf8'); $html = (string) $tidy; return $this->getStringBetween($html, '<body>'); } return $html; }
/** * @return string */ public function toPlainText(Io_Charset $outputCharset_ = null) { if (null === $outputCharset_) { $outputCharset_ = $this->m_charset; } $html = html_entity_decode($this->m_value, ENT_QUOTES, $this->m_charset->name()); $tidy = new \tidy(); $tidy->parseString($html, ['char-encoding' => $this->m_charset->name(), 'input-encoding' => $this->m_charset->name(), 'output-encoding' => $outputCharset_->name()]); $string = ''; $this->nodeToPlainText($tidy->body(), $string); return $string; }
function send_mail($server_cfg, $game_cfg, $report) { $to = ""; $bcc = $server_cfg["mail_recipients"]; $bcc .= "," . $game_cfg["mail_recipients"]; $subject = "Performance report for {$game_cfg['name']}"; $headers = 'From: noreply@xxxx.xxx' . "\r\n" . 'Bcc: ' . $bcc . "\r\n" . 'Content-Type: text/HTML' . "\r\n" . 'X-Mailer: PHP/' . phpversion(); $message = file_get_contents($report); $tidy = new tidy(); $tidy->parseString($message); $tidy->cleanRepair(); mail($to, $subject, $tidy, $headers); }
public function getAdjustedArticleContent() { $trim = Setting::find(1)->value; if ($trim != 0 && strlen($this->content) > $trim) { $text = substr($this->content, 0, $trim) . '...'; } else { $text = $this->content; } $tidy = new tidy(); $tidy->parseString($text, array('show-body-only' => true), 'utf8'); $tidy->cleanRepair(); return $tidy . '<a href="' . $this->getArticleUrl() . '">[ Continue reading ]</a>'; }
/** * Send content * * @param SendResponseEvent $event * @return $this */ public function sendContent(SendResponseEvent $event) { if ($event->contentSent()) { return $this; } $response = $event->getResponse(); $tidy = new \tidy(); $tidy->parseString($response->getContent(), $this->config); //$tidy->cleanRepair(); echo $tidy; $event->setContentSent(); return $this; }
function getTidy($url) { // $curl = curl_init('http://cgi.w3.org/cgi-bin/tidy?docAddr='.urlencode($url).'&forceXML=on'); $curl = curl_init($url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/2.0'); $rtrn = curl_exec($curl); curl_close($curl); $tidy = new tidy(); $tidy->parseString($rtrn, array('output-xml' => true, 'doctype' => 'loose', 'add-xml-decl' => true), 'utf8'); $tidy->cleanRepair(); return str_replace(' ', ' ', $tidy); }