Exemple #1
0
 function convertData($html)
 {
     // Style according to the Netiquette
     $html = preg_replace('#<(?:b|strong)\\b[^>]*>(\\s*)#iu', '$1*', $html);
     $html = preg_replace('#(\\s*)</(?:b|strong)\\b[^>]*>#iu', '*$1', $html);
     $html = preg_replace('#<u\\b[^>]*>(\\s*)#iu', '$1_', $html);
     $html = preg_replace('#(\\s*)</u\\b[^>]*>#iu', '_$1', $html);
     // Remove <sub> and <sup> tags
     $html = preg_replace('#<(/?)su[bp]\\b([^>]*)>#iu', '<$1span$2>', $html);
     // Fill empty alt attributes with whitespace, clear src attributes
     $html = preg_replace('#(<[^>]+\\balt=")"#iu', '$1 "', $html);
     $html = preg_replace('#(<[^>]+\\bsrc=")(?:[^"]*)"#iu', '$1"', $html);
     // Inline URLs
     $html = preg_replace_callback('#<a\\b[^>]*\\shref="([^"]*)"[^>]*>(.*?)</a\\b[^>]*>#isu', array(__CLASS__, 'buildTextAnchor'), $html);
     // Convert html-entities to UTF-8 for w3m
     $html = str_replace(array('&quot;', '&lt;', '&gt;', '&#039;', '"', '<', '>', "'"), array('&amp;quot;', '&amp;lt;', '&amp;gt;', '&amp;#039;', '&quot;', '&lt;', '&gt;', '&#039;'), FILTER::get($html, 'text'));
     $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8');
     $file = tempnam(PATCHWORK_ZCACHE, 'converter');
     Patchwork::writeFile($file, $html);
     $html = escapeshellarg($file);
     $html = `w3m -dump -cols {$this->cols} -T text/html -I UTF-8 -O UTF-8 {$html}`;
     $html = str_replace(self::$charMap[0], self::$charMap[1], $html);
     $html = strtr($html, self::$textAnchor);
     self::$textAnchor = array();
     unlink($file);
     return $html;
 }
Exemple #2
0
 function send()
 {
     $html = p\Serverside::returnAgent($this->agent, $this->args, $this->lang);
     if (!isset($this->headers['Subject']) && preg_match("'<title[^>]*>(.*?)</title[^>]*>'isu", $html, $title)) {
         $this->headers['Subject'] = trim(html_entity_decode($title[1], ENT_COMPAT, 'UTF-8'));
     }
     // HTML cleanup
     // Remove noisy tags
     $html = preg_replace('#<(head|script|title|applet|frameset|i?frame)\\b[^>]*>.*?</\\1\\b[^>]*>#is', '', $html);
     $html = preg_replace('#</?(?:!doctype|html|meta|body|base|link)\\b[^>]*>#is', '', $html);
     $html = preg_replace('#<!--.*?-->#s', '', $html);
     $html = trim($html);
     // Clean up URLs in attributes
     $html = preg_replace_callback('/(\\s)(src|background|href)\\s*=\\s*(["\'])?((?(3).*?|[^\\s>]*))(?(3)\\3)/iu', array($this, 'cleanUrlAttribute'), $html);
     if (!empty($this->options['embedImages'])) {
         // Embed images
         $html = preg_replace_callback('/(\\s)(src|background)="([^"]+\\.(jpe?g|png|gif))"/iu', array($this, 'addRawImage'), $html);
     }
     $this->options['html'] =& $html;
     // HTML to text conversion
     $c = new converter_txt_html(78);
     $this->options['text'] = $c->convertData($html);
     parent::send();
 }