function convertData($html) { // Style according to the Netiquette $html = preg_replace('#<(?:b|strong)\\b[^>]*>(\\s*)#iu', '$1*', $html); $html = preg_replace('#(\\s*)</(?:b|strong)\\b[^>]*>#iu', '*$1', $html); $html = preg_replace('#<u\\b[^>]*>(\\s*)#iu', '$1_', $html); $html = preg_replace('#(\\s*)</u\\b[^>]*>#iu', '_$1', $html); // Remove <sub> and <sup> tags $html = preg_replace('#<(/?)su[bp]\\b([^>]*)>#iu', '<$1span$2>', $html); // Fill empty alt attributes with whitespace, clear src attributes $html = preg_replace('#(<[^>]+\\balt=")"#iu', '$1 "', $html); $html = preg_replace('#(<[^>]+\\bsrc=")(?:[^"]*)"#iu', '$1"', $html); // Inline URLs $html = preg_replace_callback('#<a\\b[^>]*\\shref="([^"]*)"[^>]*>(.*?)</a\\b[^>]*>#isu', array(__CLASS__, 'buildTextAnchor'), $html); // Convert html-entities to UTF-8 for w3m $html = str_replace(array('"', '<', '>', ''', '"', '<', '>', "'"), array('&quot;', '&lt;', '&gt;', '&#039;', '"', '<', '>', '''), FILTER::get($html, 'text')); $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); $file = tempnam(PATCHWORK_ZCACHE, 'converter'); Patchwork::writeFile($file, $html); $html = escapeshellarg($file); $html = `w3m -dump -cols {$this->cols} -T text/html -I UTF-8 -O UTF-8 {$html}`; $html = str_replace(self::$charMap[0], self::$charMap[1], $html); $html = strtr($html, self::$textAnchor); self::$textAnchor = array(); unlink($file); return $html; }
function convertData($data) { $file = tempnam('.', 'converter'); Patchwork::writeFile($file, $data); $data = $this->convertFile($file); unlink($file); return $data; }
static function __free() { self::$adapter->close(); foreach (self::$cache as $file => &$cache) { if ($cache[0]) { $data = serialize($cache[2]); p::writeFile($file, $data); if ($cache[1]) { p::writeWatchTable('translator', $file, false); } } } }