Esempio n. 1
0
 function convertData($html)
 {
     // Style according to the Netiquette
     $html = preg_replace('#<(?:b|strong)\\b[^>]*>(\\s*)#iu', '$1*', $html);
     $html = preg_replace('#(\\s*)</(?:b|strong)\\b[^>]*>#iu', '*$1', $html);
     $html = preg_replace('#<u\\b[^>]*>(\\s*)#iu', '$1_', $html);
     $html = preg_replace('#(\\s*)</u\\b[^>]*>#iu', '_$1', $html);
     // Remove <sub> and <sup> tags
     $html = preg_replace('#<(/?)su[bp]\\b([^>]*)>#iu', '<$1span$2>', $html);
     // Fill empty alt attributes with whitespace, clear src attributes
     $html = preg_replace('#(<[^>]+\\balt=")"#iu', '$1 "', $html);
     $html = preg_replace('#(<[^>]+\\bsrc=")(?:[^"]*)"#iu', '$1"', $html);
     // Inline URLs
     $html = preg_replace_callback('#<a\\b[^>]*\\shref="([^"]*)"[^>]*>(.*?)</a\\b[^>]*>#isu', array(__CLASS__, 'buildTextAnchor'), $html);
     // Convert html-entities to UTF-8 for w3m
     $html = str_replace(array('&quot;', '&lt;', '&gt;', '&#039;', '"', '<', '>', "'"), array('&amp;quot;', '&amp;lt;', '&amp;gt;', '&amp;#039;', '&quot;', '&lt;', '&gt;', '&#039;'), FILTER::get($html, 'text'));
     $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8');
     $file = tempnam(PATCHWORK_ZCACHE, 'converter');
     Patchwork::writeFile($file, $html);
     $html = escapeshellarg($file);
     $html = `w3m -dump -cols {$this->cols} -T text/html -I UTF-8 -O UTF-8 {$html}`;
     $html = str_replace(self::$charMap[0], self::$charMap[1], $html);
     $html = strtr($html, self::$textAnchor);
     self::$textAnchor = array();
     unlink($file);
     return $html;
 }