/** * convert HTML into Text */ protected function convert() { $this->linkList = array(); // clean the string from non-UTF8 chars // & remove UTF8-BOM // & normalize whitespace $text = UTF8::clean($this->html, true, true, false); $text = UTF8::trim(stripslashes($text)); $this->converter($text); if (count($this->linkList) > 0) { $text .= "\n\nLinks:\n------\n"; foreach ($this->linkList as $i => $url) { $text .= '[' . ($i + 1) . '] ' . $url . "\n"; } } // normalize whitespace, again $text = UTF8::normalize_whitespace($text); // don't use tabs $text = preg_replace("/\t/", ' ', $text); // trim every line $textArray = explode("\n", $text); array_walk($textArray, array('self', 'trimCallback')); $text = implode("\n", $textArray); // remove leading/ending empty lines $text = UTF8::trim($text, "\n"); $this->text = $text; $this->converted = true; }
/** * Convert HTML into Text. */ protected function convert() { $this->linkList = array(); $endSearchReplaceArrayKeys = array_keys($this->endSearchReplaceArray); $endSearchReplaceArrayValues = array_values($this->endSearchReplaceArray); // Clean the string from non-UTF8 chars & remove UTF8-BOM & normalize whitespace. $text = UTF8::clean($this->html, true, true, false); $this->converter($text); // Normalize whitespace, once again. $text = UTF8::normalize_whitespace($text); // Add the link-list, if needed. if (count($this->linkList) > 0) { $text .= '[[_html2text_links]]'; foreach ($this->linkList as $i => $url) { $text .= '[' . ($i + 1) . '] ' . $url . "\n"; } } // Trim every line. $textArray = explode("\n", $text); array_walk($textArray, array('self', 'trimCallback')); $text = implode("\n", $textArray); // Convert "space"-replacer into space. $text = str_replace('|+|_html2text_space|+|', ' ', $text); // Replace some placeholder at the end. $text = preg_replace($endSearchReplaceArrayKeys, $endSearchReplaceArrayValues, $text); // Normalise empty lines. $text = preg_replace("/\n\\s+\n/", "\n\n", $text); $text = preg_replace("/[\n]{3,}/", "\n\n", $text); // If max length of line is defined, then use "wordwrap". if ($this->options['width'] > 0) { $text = UTF8::wordwrap($text, $this->options['width']); } // Remove leading/ending empty lines/spaces. $text = trim($text); $this->text = $text; $this->converted = true; }