/** * Repair HTML. If Tidy not exists, use repair function. * * @param string $html The HTML string to repair. * @param boolean $use_tidy Force tidy or not. * * @return string Repaired HTML. */ public static function repair($html, $use_tidy = true) { if (function_exists('tidy_repair_string') && $use_tidy) { $TidyConfig = array('indent' => true, 'output-xhtml' => true, 'show-body-only' => true, 'wrap' => false); return tidy_repair_string($html, $TidyConfig, 'utf8'); } else { $arr_single_tags = array('meta', 'img', 'br', 'link', 'area'); // Put all opened tags into an array preg_match_all("#<([a-z]+)( .*)?(?!/)>#iU", $html, $result); $openedtags = $result[1]; // Put all closed tags into an array preg_match_all("#</([a-z]+)>#iU", $html, $result); $closedtags = $result[1]; $len_opened = count($openedtags); // All tags are closed if (count($closedtags) == $len_opened) { return $html; } $openedtags = array_reverse($openedtags); // Close tags for ($i = 0; $i < $len_opened; $i++) { if (!in_array($openedtags[$i], $closedtags)) { if (!in_array($openedtags[$i], $arr_single_tags)) { $html .= "</" . $openedtags[$i] . ">"; } } else { unset($closedtags[array_search($openedtags[$i], $closedtags)]); } } return $html; } }
function autoclose_tags_custom($content = '') { if (function_exists('tidy_repair_string')) { return tidy_repair_string($content, array('clean' => true, 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'enclose-text' => true), 'utf8'); } preg_match_all("#<([a-z]+)( .*)?(?!/)>#iU", $content, $result); $openedtags = $result[1]; preg_match_all("#</([a-z]+)>#iU", $content, $result); $closedtags = $result[1]; $len_opened = count($openedtags); if (count($closedtags) == $len_opened) { return $content; } $openedtags = array_reverse($openedtags); for ($i = 0; $i < $len_opened; $i++) { if (!in_array($openedtags[$i], $closedtags)) { if (!in_array($openedtags[$i], array('img', 'br', 'hr', 'input', 'col', 'meta', 'link'))) { $content .= '</' . $openedtags[$i] . '>'; } } else { unset($closedtags[array_search($openedtags[$i], $closedtags)]); } } return $content; }
function DoFooter($buffer) { global $noFooter, $timeStart, $queries, $overallTidy, $boardname, $title, $dblink, $ajax, $footerButtons, $footerExtensionsA, $footerExtensionsB; if (!$noFooter) { //if(function_exists("runBucket")) runBucket("footerButtons"); $footer = format("\n\t\t<div class=\"footer\">\n\t\t\tPowered by <a href=\"https://github.com/Dirbaio/ABXD\">AcmlmBoard XD</a>, version 2.2.6<br />\n\t\t\tBy Kawa, Mega-Mario, Nikolaj, et al<br />\n\t\t\tAcmlmBoard © Jean-François Lapointe<br />\n\t\t\t" . __("Page rendered in {0} seconds with {1}.") . "<br />\n\t\t\t{3}\n\n\t\t\t<a href=\"http://validator.w3.org/check?uri=referer\">\n\t\t\t\t<img src=\"img/xhtml10.png\" alt=\"Valid XHTML 1.0 Transitional\" />\n\t\t\t</a>\n\t\t\t<a href=\"http://jigsaw.w3.org/css-validator/\">\n\t\t\t\t<img src=\"img/css.png\" alt=\"Valid CSS!\" />\n\t\t\t</a>\n\t\t\t<a href=\"http://abxd.dirbaio.net/?page=downloads\">\n\t\t\t\t<img src=\"img/getabxd.png\" alt=\"Get a copy for yourself\" />\n\t\t\t</a>\n\t\t\t{2}\n\t\t</div>\n\t</div>\n</body>\n</html>\n", sprintf("%1.3f", usectime() - $timeStart), Plural($queries, __("MySQL query")), $footerButtons, __("<!-- English translation by Kawa -->")); } $boardTitle = htmlval($boardname); if ($title != "") { $boardTitle .= " » " . $title; } $raw = $buffer . $footerExtensionsA . $footer . $footerExtensionsB; $raw = str_replace("<title>[[BOARD TITLE HERE]]</title>", "<title>" . $boardTitle . "</title>", $raw); if (!$ajax) { $raw = OptimizeLayouts($raw); } mysql_close($dblink); if (!$overallTidy) { return $raw; } $tidyConfig = array("show-body-only" => 0, "output-xhtml" => 1, "doctype" => "transitional", "logical-emphasis" => 1, "alt-text" => "", "drop-proprietary-attributes" => 1, "wrap" => 0, "preserve-entities" => 1, "indent" => 1, "input-encoding" => "utf8", "char-encoding" => "utf8", "output-encoding" => "utf8", "new-blocklevel-tags" => "video"); //if(function_exists(OptimizeLayouts)) // $raw = OptimizeLayouts($raw); $clean = tidy_repair_string($raw, $tidyConfig); $clean = str_replace("class=\"required", "required=\"required\" class=\"", $clean); $textareaFixed = str_replace("\r", "", $clean); $textareaFixed = str_replace(" </text", "</text", $textareaFixed); $textareaFixed = str_replace("\n</text", "</text", $textareaFixed); //$textareaFixed = str_replace("\n</text", "</text", $textareaFixed); return $textareaFixed; }
public function flush() { if ($this->enabled && $this->userEnabled) { $this->outputResponse->appendData(\tidy_repair_string($this->data, $this->config, 'utf8')); } $this->outputResponse->flush(); $dataBuffer = ''; }
function __destruct2() { $output = ob_get_clean(); $config = array('indent' => true, 'output-xhtml' => false, 'wrap' => false, 'hide-comments' => true, 'logical-emphasis' => true); $output = tidy_repair_string($output, $config, 'utf8'); ob_start(); echo $output; }
function webifyFile($file, $toc, $editions) { $filename = basename($file); if (strpos('-book-', $filename)) { return; } $toc = str_replace('<a href="' . $filename . '">', '<a href="' . $filename . '" class="active">', $toc); $template = file_get_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'templates' . DIRECTORY_SEPARATOR . 'page.html'); $title = ''; $content = ''; $prev = ''; $next = ''; if ($filename !== 'index.html') { if (strpos($filename, 'appendixes') === 0) { $type = 'appendix'; } else { if (strpos($filename, 'preface') === 0) { $type = 'preface'; } else { if (strpos($filename, 'pt') === 0) { $type = 'part'; } else { $type = 'chapter'; } } } $buffer = file_get_contents($file); $title = getSubstring($buffer, '<title>', '</title>', FALSE, FALSE); $content = getSubstring($buffer, '<div class="' . $type . '"', '<div class="navfooter">', TRUE, FALSE); $prev = getSubstring($buffer, '<link rel="prev" href="', '" title', FALSE, FALSE); $next = getSubstring($buffer, '<link rel="next" href="', '" title', FALSE, FALSE); if (!empty($prev)) { $prev = '<a accesskey="p" href="' . $prev . '">Prev</a>'; } if (!empty($next)) { $next = '<a accesskey="n" href="' . $next . '">Next</a>'; } } else { $buffer = file_get_contents($file); $title = getSubstring($buffer, '<title>', '</title>', FALSE, FALSE); // $content = getSubstring($buffer, '<div class="titlepage"', '<div class="navfooter">', TRUE, FALSE); $content = getSubstring($buffer, '<div class="titlepage"', '<hr>', TRUE, FALSE) . "\n</div>\n"; $prev = ''; $next = getSubstring($buffer, '<link rel="next" href="', '" title', FALSE, FALSE); if (!empty($next)) { $next = '<a accesskey="n" href="' . $next . '">Next</a>'; } } $buffer = str_replace(array('{title}', '{content}', '{toc}', '{editions}', '{prev}', '{next}'), array($title, $content, $toc, $editions, $prev, $next), $template); if (function_exists('tidy_repair_string')) { $buffer = tidy_repair_string($buffer, array('indent' => TRUE, 'output-xhtml' => TRUE, 'wrap' => 0), 'utf8'); } file_put_contents($file, $buffer); }
function TidyLayout(&$header, &$footer) { return; global $tidyconfig; print "<!-- TIDYLAYOUT \n" . $header . "\n\n" . $footer; $sep = "%%SNIP%%"; $pl = trim(tidy_repair_string($header . $sep . $footer, $tidyconfig)); $header = substr($pl, 0, strpos($pl, $sep)); $footer = substr($pl, strpos($pl, $sep) + strlen($sep)); print "\n\n" . $header . "\n\n" . $footer . "\n-->"; }
private function parse($data) { $data = iconv('gbk', 'utf-8', $data); $data = tidy_repair_string($data); $doc = phpQuery::newDocument($data); foreach (pq('tr') as $index => $tr) { if ($index < 3) { continue; } $item = array('date' => pq($tr)->find('td:eq(0)')->text(), 'tag' => pq($tr)->find('td:eq(2)')->text(), 'number' => pq($tr)->find('td:eq(3)')->text()); var_dump($item); } }
function html2bb($html = '') { $html = nl2br(trim(stripslashes($html))); $html = tidy_repair_string($html, array('output-xhtml' => true, 'show-body-only' => true, 'doctype' => 'strict', 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'lower-literals' => true, 'quote-ampersand' => true, 'wrap' => 0), 'utf8'); $html = trim($html); $html = preg_replace('!<a(.*)href=(.+)>(.+)</a>!isU', '[url=$2]$3[/url]', $html); $html = preg_replace('!<a(.*)>(.+)</a>!isU', '$2', $html); $html = preg_replace('!<a(.*)href=(.+)></a>!isU', '[url]$2[/url]', $html); $html = preg_replace('!<br(.*)>!isU', '\\r\\n', $html); $html = str_replace('<p>', '\\r\\n', $html); $html = str_replace('</p>', '\\r\\n', $html); return htmlspecialchars($html); }
/** * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * * @param string $text HTML to check * @param bool $stderr Whether to read result from error status instead of output * @param int &$retval Exit code (-1 on internal error) * @return string|null */ protected function cleanWrapped($text, $stderr = false, &$retval = null) { if ($stderr) { throw new Exception("\$stderr cannot be used with RaggettInternalHHVM"); } $cleansource = tidy_repair_string($text, $this->config['tidyConfigFile'], 'utf8'); if ($cleansource === false) { $cleansource = null; $retval = -1; } else { $retval = 0; } return $cleansource; }
/** * @param Message $message * * @return Purchase[] */ protected function parseData(Message $message) { $dom = new \DOMDocument(); $dom->loadHTML(tidy_repair_string($message->getMessageBody(true))); $nodes = $dom->getElementsByTagName('p'); $purchase = new Purchase\Purchase(); $matches = []; preg_match('/((?:(?:[0-2]?\\d{1})|(?:[3][01]{1}))[-:\\/.](?:[0]?[1-9]|[1][012])[-:\\/.](?:(?:[1]{1}\\d{1}\\d{1}\\d{1})|(?:[2]{1}\\d{3})))(?![\\d])/', $nodes->item(2)->nodeValue, $matches); $purchase->date = new Date(join('-', array_reverse(explode('/', $matches[1])))); preg_match('/local: (.*?),/', str_replace('.', ',', str_replace("\n", ' ', $nodes->item(2)->nodeValue)), $matches); $purchase->place = trim($matches[1]); preg_match('/R\\$ (.*), no dia/', $nodes->item(2)->nodeValue, $matches); $purchase->amount = (new Currency($matches[1]))->getValue(); return [$purchase]; }
/** * @author Hannes Gassert <hannes at mediagonal dot ch> * @param string text to be filtered */ function filter($text) { /// Configuration for tidy. Feel free to tune for your needs, e.g. to allow /// proprietary markup. $tidyoptions = array('output-xhtml' => true, 'show-body-only' => true, 'tidy-mark' => false, 'drop-proprietary-attributes' => true, 'drop-font-tags' => true, 'drop-empty-paras' => true, 'indent' => true, 'quiet' => true); /// Do a quick check using strpos to avoid unnecessary work if (strpos($text, '<') === false) { return $text; } /// If enabled: run tidy over the entire string if (function_exists('tidy_repair_string')) { $text = tidy_repair_string($text, $tidyoptions, 'utf8'); } return $text; }
function html2bb($html = '') { $old = $html; $html = trim(stripslashes($html)); $html = tidy_repair_string($html, array('output-xhtml' => true, 'show-body-only' => true, 'doctype' => 'strict', 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'lower-literals' => true, 'quote-ampersand' => true, 'wrap' => 0), 'utf8'); $html = trim($html); $html = preg_replace('!<a(.*)href=(.+)>(.+)</a>!isU', '[url=$2]$3[/url]', $html); $html = preg_replace('!(<|<)a(.*)href=(.+)(>|>)(.+)(<|<)/a(>|>)!isU', '[url=$3]$5[/url]', $html); $html = preg_replace('!<a(.*)>(.+)</a>!isU', '$2', $html); $html = preg_replace('!<a(.*)href=(.+)></a>!isU', '[url]$2[/url]', $html); $html = preg_replace('!(<|<)br(.*)(>|>)!isU', "\r\n", $html); $html = str_replace('<p>', "\r\n", $html); $html = str_replace('</p>', "", $html); return $html; }
/** * Main-Method for the Task * * @return void * @throws BuildException */ public function main() { // check supplied attributes $this->checkDir(); $this->checkFile(); // load file $xml = simplexml_load_file($this->file); // Just set them $xml = $this->setLanguages($xml); // write the new xml to the old xml file, using tidy if installed if (function_exists('tidy_repair_string')) { file_put_contents($this->file, tidy_repair_string($xml->asXML(), array('output-xml' => true, 'input-xml' => true, 'indent' => true, 'indent-spaces' => 4, 'wrap' => 0, 'vertical-space' => true, 'output-bom' => false, 'newline' => 'LF', 'char-encoding' => 'utf8'))); } else { $xml->asXML($this->file); } }
function tidyRepair($article, $easyset) { if ('com_content' != JRequest::getVar('option')) { return; } if (!$article->introtext && !$article->fulltext) { return; } if (function_exists('tidy_repair_string')) { //Tidy Repair Text $TidyConfig = array('indent' => TRUE, 'output-xhtml' => true, 'show-body-only' => true, 'wrap' => false); $article->introtext = tidy_repair_string($article->introtext, $TidyConfig, 'utf8'); $article->fulltext = tidy_repair_string($article->fulltext, $TidyConfig, 'utf8'); } else { require_once 'closeTags.php'; $article->introtext = closetags($article->introtext); $article->fulltext = closetags($article->fulltext); } }
public function __construct($url) { $html = file_get_contents($url); if (false === $html) { throw new Exception("Can't retrieve {$url}"); } /* Turn the HTML into valid XHTML */ $clean = tidy_repair_string($html, array('output-xhtml' => true)); /* Load it into a DOMDocument, hiding any libxml * warnings */ $this->doc = new DOMDocument(); libxml_use_internal_errors(true); if (false === $this->doc->loadHtml($clean)) { throw new Exception("Can't parse {$url} as HTML"); } libxml_use_internal_errors(false); $this->currentNode = $this->doc->documentElement; $this->x = new DOMXPath($this->doc); }
public function strip($html) { /* Tell Tidy to produce XHTML */ $xhtml = tidy_repair_string($html, array('output-xhtml' => true)); /* Load the dirty HTML into a DOMDocument */ $dirty = new DOMDocument(); $dirty->loadXml($xhtml); $dirtyBody = $dirty->getElementsByTagName('body')->item(0); /* Make a blank DOMDocument for the clean HTML */ $clean = new DOMDocument(); $cleanBody = $clean->appendChild($clean->createElement('body')); /* Copy the allowed nodes from dirty to clean */ $this->copyNodes($dirtyBody, $cleanBody); /* Return the contents of the clean body */ $stripped = ''; foreach ($cleanBody->childNodes as $node) { $stripped .= $clean->saveXml($node); } return trim($stripped); }
function html_standardization($html) { if (!function_exists('tidy_repair_string')) { return $html; } $str = tidy_repair_string($html, array('output-xhtml' => true), 'utf8'); if (!$str) { return $html; } $str = tidy_parse_string($str, array('output-xhtml' => true), 'utf8'); $standard_html = ''; $nodes = @tidy_get_body($str)->child; if (!is_array($nodes)) { $returnVal = 0; return $html; } foreach ($nodes as $n) { $standard_html .= $n->value; } return $standard_html; }
/** * Fixes an invalid HTML source, unifies quotes and removes unnecessary whitespace. * Required the Tidy PHP extension. * * @param string $html Input HTML source * @return string */ public static function repair($html) { // HTML fixing static $config = array('newline' => 'LF', 'indent' => false, 'output-xhtml' => true, 'output-bom' => false, 'doctype' => 'auto', 'bare' => true, 'wrap' => 0, 'wrap-sections' => false, 'enclose-text' => true, 'merge-divs' => false, 'merge-spans' => false, 'force-output' => true, 'show-errors' => 0, 'show-warnings' => false, 'escape-cdata' => true, 'preserve-entities' => true); $html = tidy_repair_string($html, $config, 'utf8'); // Removes namespace <?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /? > generated by MS Word $html = preg_replace('~<\\?xml:namespace[^>]*>~i', '', $html); // Removes unnecessary line breaks and keeps them inside <pre> elements // Tidy adds one more line breaks inside <pre> elements $html = preg_replace("~(<pre[^>]*>)\n~", '\\1', $html); $html = preg_replace("~\n</pre>~", '</pre>', $html); $html = preg_replace_callback('~(<pre[^>]*>)(.+?)(</pre>)~s', function ($matches) { return $matches[1] . strtr(nl2br($matches[2]), array('\\"' => '"')) . $matches[3]; }, $html); // Strip line breaks $html = strtr($html, array("\r" => '', "\n" => '')); // Replace single quotes with double quotes (for easier processing later) $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=)\'([^\']*)\'~i', '\\1"\\2"', $html); // Remove unnecessary spaces inside elements (for easier processing later) $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=")\\s+([^"]*")~i', '\\1\\2', $html); $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+="[^"]*)\\s+(")~i', '\\1\\2', $html); return $html; }
function handle($params) { $app =& Dataface_Application::getInstance(); if (!isset($_GET['key'])) { trigger_error("No key specified", E_USER_ERROR); } $sql = "select `value` from `" . TRANSLATION_PAGE_TABLE . "` where `key` = '" . addslashes($_GET['key']) . "'"; $res = mysql_query($sql, $app->db()); if (!$res) { trigger_error(mysql_error($app->db()), E_USER_ERROR); } if (mysql_num_rows($res) == 0) { trigger_error("Sorry the specified key was invalid.", E_USER_ERROR); } list($content) = mysql_fetch_row($res); @mysql_free_result($res); if (function_exists('tidy_parse_string')) { $config = array('show-body-only' => true, 'output-encoding' => 'utf8'); $html = tidy_repair_string($content, $config, "utf8"); $content = trim($html); } df_display(array('content' => $content), 'TranslationPageTemplate.html'); return true; }
/** * Render a template, but before doing so, call the action file and render * the header->view->footer in that order. * * @param array $dataArr Key=>value pairs of variables to be exposed to the template as globals. * @access public */ public function render(array $dataArr = array()) { // GZip compression. if (Flux::config('GzipCompressOutput')) { header('Accept-Encoding: gzip'); ini_set('zlib.output_handler', ''); ini_set('zlib.output_compression', 'On'); ini_set('zlib.output_compression_level', (int) Flux::config('GzipCompressionLevel')); } $addon = false; $this->actionPath = sprintf('%s/%s/%s.php', $this->modulePath, $this->moduleName, $this->actionName); if (!file_exists($this->actionPath)) { foreach (Flux::$addons as $_tmpAddon) { if ($_tmpAddon->respondsTo($this->moduleName, $this->actionName)) { $addon = $_tmpAddon; $this->actionPath = sprintf('%s/%s/%s.php', $addon->moduleDir, $this->moduleName, $this->actionName); } } if (!$addon) { $this->moduleName = $this->missingActionModuleAction[0]; $this->actionName = $this->missingActionModuleAction[1]; $this->viewName = $this->missingActionModuleAction[1]; $this->actionPath = sprintf('%s/%s/%s.php', $this->modulePath, $this->moduleName, $this->actionName); } } $viewExists = false; $this->viewPath = sprintf('%s/%s/%s/%s.php', $addon ? $addon->themeDir : $this->themePath, $this->themeName, $this->moduleName, $this->actionName); if (!file_exists($this->viewPath)) { $this->moduleName = $this->missingViewModuleAction[0]; $this->actionName = $this->missingViewModuleAction[1]; $this->viewName = $this->missingViewModuleAction[1]; $this->actionPath = sprintf('%s/%s/%s.php', $this->modulePath, $this->moduleName, $this->actionName); $this->viewPath = sprintf('%s/%s/%s/%s.php', $this->themePath, $this->themeName, $this->moduleName, $this->viewName); } $this->headerPath = sprintf('%s/%s/%s.php', $this->themePath, $this->themeName, $this->headerName); $this->footerPath = sprintf('%s/%s/%s.php', $this->themePath, $this->themeName, $this->footerName); $this->url = $this->url($this->moduleName, $this->actionName); $this->urlWithQS = $this->url; if (!empty($_SERVER['QUERY_STRING'])) { if ($this->useCleanUrls) { $this->urlWithQS .= "?{$_SERVER['QUERY_STRING']}"; } else { foreach (explode('&', trim($_SERVER['QUERY_STRING'], '&')) as $line) { list($key, $val) = explode('=', $line, 2); $key = urldecode($key); $val = urldecode($val); if ($key != 'module' && $key != 'action') { $this->urlWithQS .= sprintf('&%s=%s', urlencode($key), urlencode($val)); } } } } // Compatibility. $this->urlWithQs = $this->urlWithQS; // Tidy up! if (Flux::config('OutputCleanHTML')) { $dispatcher = Flux_Dispatcher::getInstance(); $tidyIgnore = false; if (($tidyIgnores = Flux::config('TidyIgnore')) instanceof Flux_Config) { foreach ($tidyIgnores->getChildrenConfigs() as $ignore) { $ignore = $ignore->toArray(); if (is_array($ignore) && array_key_exists('module', $ignore)) { $module = $ignore['module']; $action = array_key_exists('action', $ignore) ? $ignore['action'] : $dispatcher->defaultAction; if ($this->moduleName == $module && $this->actionName == $action) { $tidyIgnore = true; } } } } if (!$tidyIgnore) { ob_start(); } } // Merge with default data. $data = array_merge($this->defaultData, $dataArr); // Extract data array and make them appear as though they were global // variables from the template. extract($data, EXTR_REFS); // Files object. $files = new Flux_Config($_FILES); $preprocessorPath = sprintf('%s/main/preprocess.php', $this->modulePath); if (file_exists($preprocessorPath)) { include $preprocessorPath; } include $this->actionPath; $pageMenuFile = FLUX_ROOT . "/modules/{$this->moduleName}/pagemenu/{$this->actionName}.php"; $pageMenuItems = array(); // Get the main menu file first (located in the actual module). if (file_exists($pageMenuFile)) { ob_start(); $pageMenuItems = (include $pageMenuFile); ob_end_clean(); } $addonPageMenuFiles = glob(FLUX_ADDON_DIR . "/*/modules/{$this->moduleName}/pagemenu/{$this->actionName}.php"); if ($addonPageMenuFiles) { foreach ($addonPageMenuFiles as $addonPageMenuFile) { ob_start(); $pageMenuItems = array_merge($pageMenuItems, include $addonPageMenuFile); ob_end_clean(); } } if (file_exists($this->headerPath)) { include $this->headerPath; } include $this->viewPath; if (file_exists($this->footerPath)) { include $this->footerPath; } // Really, tidy up! if (Flux::config('OutputCleanHTML') && !$tidyIgnore && function_exists('tidy_repair_string')) { $content = ob_get_clean(); $content = tidy_repair_string($content, array('indent' => true, 'wrap' => false, 'output-xhtml' => true), 'utf8'); echo $content; } }
<?php $x = tidy_repair_string("<p>abracadabra</p>", array('show-body-only' => true, 'clean' => false, 'newline' => "\n")); var_dump($x);
<?php $html = <<<_HTML_ <p>Some things I enjoy eating are:</p> <ul> <li><a href="http://en.wikipedia.org/wiki/Pickle">Pickles</a></li> <li><a href="http://www.eatingintranslation.com/2011/03/great_ny_noodle.html">Salt-Baked Scallops</a></li> <li><a href="http://www.thestoryofchocolate.com/">Chocolate</a></li> </ul> _HTML_; $doc = new DOMDocument(); $opts = array('output-xhtml' => true, 'wrap' => 0, 'numeric-entities' => true); $doc->loadXML(tidy_repair_string($html, $opts)); $xpath = new DOMXPath($doc); // Tell $xpath about the XHTML namespace $xpath->registerNamespace('xhtml', 'http://www.w3.org/1999/xhtml'); foreach ($xpath->query('//xhtml:a') as $node) { $anchor = trim($node->textContent); $link = $node->getAttribute('href'); print "{$anchor} -> {$link}\n"; }
unlink($file); } $db = new PDO('sqlite:' . $file); $db->exec('CREATE TABLE caffeine (name VARCHAR(255), link VARCHAR(255))'); $db->exec('CREATE UNIQUE INDEX caffeine_name ON caffeine (name)'); $insert = $db->prepare('INSERT INTO caffeine (name, link) VALUES (:name, :link)'); // Get raw energyfiend.com data set echo 'Downloading energyfiend.com data set', PHP_EOL; $file = __DIR__ . '/the-caffeine-database.html'; if (!file_exists($file)) { copy('http://www.energyfiend.com/the-caffeine-database', $file); } $contents = file_get_contents($file); // Extract data from data set echo 'Processing energyfiend.com data', PHP_EOL; $contents = tidy_repair_string($contents); libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML($contents); libxml_clear_errors(); $xpath = new DOMXPath($doc); $caffeine = $xpath->query('//table[@id="caffeinedb"]//tr/td[1]'); $db->beginTransaction(); foreach ($caffeine as $drink) { $name = iconv('UTF-8', 'ISO-8859-1//TRANSLIT', $drink->textContent); $name = preg_replace('/\\s*\\v+\\s*/', ' ', $name); if ($drink->firstChild->nodeName == 'a') { $link = 'http://energyfiend.com' . $drink->firstChild->getAttribute('href'); } else { $link = null; }
/** * Create a PDF representation of an invoice. * * @return string The (mangled) filename of the PDF file */ public function createPDF() { // Get the invoice number from the model's state $akeebasubs_subscription_id = $this->getId(); // Fetch the HTML from the database using the invoice number in $this->getId() $db = $this->getDbo(); $query = $db->getQuery(true)->select('*')->from($db->qn('#__akeebasubs_invoices'))->where($db->qn('extension') . ' = ' . $db->q('akeebasubs'))->where($db->qn('akeebasubs_subscription_id') . ' = ' . $db->q($akeebasubs_subscription_id)); $db->setQuery($query, 0, 1); $invoiceRecord = $db->loadObject(); $invoice_no = $invoiceRecord->invoice_no; // Repair the input HTML if (function_exists('tidy_repair_string')) { $tidyConfig = array('bare' => 'yes', 'clean' => 'yes', 'drop-proprietary-attributes' => 'yes', 'clean' => 'yes', 'output-html' => 'yes', 'show-warnings' => 'no', 'ascii-chars' => 'no', 'char-encoding' => 'utf8', 'input-encoding' => 'utf8', 'output-bom' => 'no', 'output-encoding' => 'utf8', 'force-output' => 'yes', 'tidy-mark' => 'no', 'wrap' => 0); $repaired = tidy_repair_string($invoiceRecord->html, $tidyConfig, 'utf8'); if ($repaired !== false) { $invoiceRecord->html = $repaired; } } // Fix any relative URLs in the HTML $invoiceRecord->html = $this->fixURLs($invoiceRecord->html); //echo "<pre>" . htmlentities($invoiceRecord->html) . "</pre>"; die(); // Create the PDF $pdf = $this->getTCPDF(); $pdf->AddPage(); $pdf->writeHTML($invoiceRecord->html, true, false, true, false, ''); $pdf->lastPage(); $pdfData = $pdf->Output('', 'S'); unset($pdf); // Write the PDF data to disk using JFile::write(); JLoader::import('joomla.filesystem.file'); if (function_exists('openssl_random_pseudo_bytes')) { $rand = openssl_random_pseudo_bytes(16); if ($rand === false) { // Broken or old system $rand = mt_rand(); } } else { $rand = mt_rand(); } $hashThis = serialize($invoiceRecord) . microtime() . $rand; if (function_exists('hash')) { $hash = hash('sha256', $hashThis); } if (function_exists('sha1')) { $hash = sha1($hashThis); } else { $hash = md5($hashThis); } $name = $hash . '_' . $invoiceRecord->invoice_no . '.pdf'; $path = JPATH_ADMINISTRATOR . '/components/com_akeebasubs/invoices/'; $ret = JFile::write($path . $name, $pdfData); if ($ret) { // Delete the old invoice file $oldName = $invoiceRecord->filename; if (JFile::exists($path . $oldName)) { JFile::delete($path . $oldName); } // Update the invoice record $invoiceRecord->filename = $name; $db->updateObject('#__akeebasubs_invoices', $invoiceRecord, 'akeebasubs_subscription_id'); // return the name of the file return $name; } else { return false; } }
<?php $str = <<<EOD <html>me</html><body>me</body> EOD; echo tidy_repair_string($str);
$this->state->authenticated = true; } public function addEntry($databaseURI, $content, $contentType, $cuid) { echo "Adding {$cuid} of {$contentType} to {$databaseURI}:\n{$content}\n"; } public function replaceEntry($databaseURI, $content, $contentType, $cuid) { echo "Replacing {$cuid} of {$contentType} in {$databaseURI}:\n{$content}\n"; } public function deleteEntry($databaseURI, $cuid) { echo "Deleting {$cuid} from {$databaseURI}\n"; } } if (!isset($argc)) { die("argv/argc has to be enabled.\n"); } if ($argc != 2) { die('Usage: ' . basename($argv[0]) . " syncml_client_nn.[wb]xml\n"); } $backend = new Backend(array()); $sync = new Horde_SyncMl_ContentHandler(); $sync->debug = true; $sync->process(file_get_contents($argv[1]), strpos($argv[1], '.wbxml') ? 'application/vnd.syncml+wbxml' : 'application/vnd.syncml'); $output = $sync->getOutput(); if (function_exists('tidy_repair_string')) { $output = tidy_repair_string($output, array('indent' => true, 'input-xml' => true, 'output-xml' => true)); } echo $output, "\n"; @session_destroy();
protected function populate($content, $encoding = null) { // handle document encoding if ($this->handleEncoding) { $this->handleEncoding($content, $encoding); } // tidy if ($this->tidyEnabled && function_exists('tidy_repair_string')) { $content = tidy_repair_string($content, $this->tidyConfig, 'raw'); } // load if (PHP_VERSION_ID >= 50400) { $this->document->loadHTML($content, $this->libxmlFlags); } else { $this->document->loadHTML($content); // @codeCoverageIgnore } }
/** * @brief Get the document, render any views, and optionally tidy up. * */ public function getContent($tidy = false) { $this->endBuffer(); /* if ($this->view) { $this->view->setDocument($this); if ($this->decorator) { $this->decorator->setDocument($this); $this->decorator->setContentView($this->view); $this->body = $this->decorator->render(true); } else { $this->body = $this->view->render(true); } unset($this->view); } */ $out = $this->doctype . _NL_ . $this->getDocumentHead() . (string) $this->body . $this->getDocumentFoot(); //$tidy = (bool)App::config()->get('html.document.tidy', false); if (function_exists('tidy_parse_string') && $tidy) { $config = array('indent' => true, 'doctype' => 'omit', 'output-xhtml' => $this->xhtml, 'input-encoding' => $this->charset, 'output-encoding' => $this->charset, 'drop-empty-paras' => false, 'language' => $this->lang, 'vertical-space' => false, 'wrap' => 100); $tidy = tidy_repair_string($out, $config, strtolower(str_replace(' ', '', $this->charset))); $out = $this->doctype . "\n" . (string) $tidy . _NL_; $out = str_replace(">\n</script>", "></script>", $out); } return $out; }
/** * @param Jid $jid Clients JID * @param string $password Account Password * @param Connector\AbstractConnector $connector */ public function __construct(Jid $jid, $password, AbstractConnector $connector = null) { if (!$connector) { $connector = new TcpConnector($jid->server); } $this->connector = $connector; $this->connector->client = $this; $this->jid = $jid; $this->password = $password; $this->onPacket = new Event(); $this->onAuth = new Event(); $this->onStreamOpen = new Event(); $this->onReady = new Event(); $this->onTick = new Event(); $this->onPresence = new Event(); $this->onMessage = new Event(); $this->onIq = new Event(); $this->onJoin = new Event(); $this->onLeave = new Event(); $this->onTls = new Event(); $this->onRoomJoin = new Event(); $this->onRoomLeave = new Event(); $this->onNickChange = new Event(); $this->onConnect = new Event(); $this->onDisconnect = new Event(); $this->roster = new Roster($this); $this->keepAliveTimer = new Timer(15, array($this, 'keepAliveTick')); $this->keepAliveTimer->stop(); // We don't want to run this before connection is finalized. $this->connector->onConnect->add(function ($c) { $this->onConnect->run($this); }); $this->connector->onDisconnect->add(function ($c) { $this->onDisconnect->run($this); }); $this->connector->onReceive->add([$this, 'connector_onReceive']); $this->connector->onFeatures->add([$this, 'connector_onFeatures']); $this->onConnect->add(array($this, '_onConnect')); $this->onAuth->add(array($this, '_onAuth')); $this->onReady->add(array($this, '_onReady')); $this->onPresence->add(array($this, '_onPresence')); $this->onMessage->add(array($this, '_onMessage')); $this->onTls->add(array($this, '_onTls')); $this->onPacket->add(array($this, '_onPacket')); $this->onPacket->add(array($this, '_handleExpected')); $this->onDisconnect->add(array($this, '_onDisconnect')); $settings = ['indent' => true, 'input-xml' => true, 'output-xml' => true, 'drop-empty-paras' => false, 'wrap' => 0]; $this->connector->onSend->add(function ($socket, $packet) use($settings) { $len = strlen($packet); if (function_exists('tidy_repair_string')) { $packet = trim(tidy_repair_string($packet, $settings)); } if (isset($this->logger)) { $this->logger->debug("Sent {length} bytes: \n{packet}", ['length' => $len, 'packet' => $packet]); } }); $this->connector->onReceive->add(function ($socket, $packet) use($settings) { $len = strlen($packet); if (function_exists('tidy_repair_string')) { $packet = trim(tidy_repair_string($packet, $settings)); } if (isset($this->logger)) { $this->logger->debug("Received {length} bytes: \n{packet}", ['length' => $len, 'packet' => $packet]); } }); XmlBranch::$bind['iq'] = 'Kadet\\Xmpp\\Stanza\\Iq'; XmlBranch::$bind['presence'] = 'Kadet\\Xmpp\\Stanza\\Presence'; XmlBranch::$bind['message'] = 'Kadet\\Xmpp\\Stanza\\Message'; XmlBranch::$bind['query'] = 'Kadet\\Xmpp\\Stanza\\Iq\\Query'; }