function convertData($html) { // Style according to the Netiquette $html = preg_replace('#<(?:b|strong)\\b[^>]*>(\\s*)#iu', '$1*', $html); $html = preg_replace('#(\\s*)</(?:b|strong)\\b[^>]*>#iu', '*$1', $html); $html = preg_replace('#<u\\b[^>]*>(\\s*)#iu', '$1_', $html); $html = preg_replace('#(\\s*)</u\\b[^>]*>#iu', '_$1', $html); // Remove <sub> and <sup> tags $html = preg_replace('#<(/?)su[bp]\\b([^>]*)>#iu', '<$1span$2>', $html); // Fill empty alt attributes with whitespace, clear src attributes $html = preg_replace('#(<[^>]+\\balt=")"#iu', '$1 "', $html); $html = preg_replace('#(<[^>]+\\bsrc=")(?:[^"]*)"#iu', '$1"', $html); // Inline URLs $html = preg_replace_callback('#<a\\b[^>]*\\shref="([^"]*)"[^>]*>(.*?)</a\\b[^>]*>#isu', array(__CLASS__, 'buildTextAnchor'), $html); // Convert html-entities to UTF-8 for w3m $html = str_replace(array('"', '<', '>', ''', '"', '<', '>', "'"), array('&quot;', '&lt;', '&gt;', '&#039;', '"', '<', '>', '''), FILTER::get($html, 'text')); $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); $file = tempnam(PATCHWORK_ZCACHE, 'converter'); Patchwork::writeFile($file, $html); $html = escapeshellarg($file); $html = `w3m -dump -cols {$this->cols} -T text/html -I UTF-8 -O UTF-8 {$html}`; $html = str_replace(self::$charMap[0], self::$charMap[1], $html); $html = strtr($html, self::$textAnchor); self::$textAnchor = array(); unlink($file); return $html; }
function send() { $html = p\Serverside::returnAgent($this->agent, $this->args, $this->lang); if (!isset($this->headers['Subject']) && preg_match("'<title[^>]*>(.*?)</title[^>]*>'isu", $html, $title)) { $this->headers['Subject'] = trim(html_entity_decode($title[1], ENT_COMPAT, 'UTF-8')); } // HTML cleanup // Remove noisy tags $html = preg_replace('#<(head|script|title|applet|frameset|i?frame)\\b[^>]*>.*?</\\1\\b[^>]*>#is', '', $html); $html = preg_replace('#</?(?:!doctype|html|meta|body|base|link)\\b[^>]*>#is', '', $html); $html = preg_replace('#<!--.*?-->#s', '', $html); $html = trim($html); // Clean up URLs in attributes $html = preg_replace_callback('/(\\s)(src|background|href)\\s*=\\s*(["\'])?((?(3).*?|[^\\s>]*))(?(3)\\3)/iu', array($this, 'cleanUrlAttribute'), $html); if (!empty($this->options['embedImages'])) { // Embed images $html = preg_replace_callback('/(\\s)(src|background)="([^"]+\\.(jpe?g|png|gif))"/iu', array($this, 'addRawImage'), $html); } $this->options['html'] =& $html; // HTML to text conversion $c = new converter_txt_html(78); $this->options['text'] = $c->convertData($html); parent::send(); }