public function extractFromString($string) { $body_text = array(); if ($string != '') { $o_HtmlParser = new HtmlParser(); $a_DataParser = $o_HtmlParser->parser($string); if (!$a_DataParser) { return 'Cannot read your data !'; } /* do smart binary array */ $i = 0; $j = 0; $k = 0; $endcode = array(); $idx = 0; $token = array(); foreach ($a_DataParser['tokenized_data'] as $index => $sz_tokenizedData) { /* If is tag */ if ($this->isTag($a_DataParser, $index)) { $binary_token[$i] = 1; } elseif ($this->isTagClose($a_DataParser, $index)) { $binary_token[$i] = 0; } else { $binary_token[$i] = -1; $token[$idx] = $sz_tokenizedData; $idx++; } $i++; } for ($k = 0; $k < $i; $k++) { $x = $binary_token[$k]; /* Add an index for close tag. We will use it when calculate body text */ if ($x == 0) { $j++; $endcode[$j] = 0; $j++; continue; } if (abs($x + $endcode[$j]) < abs($endcode[$j])) { $j++; } $endcode[$j] += $x; } /* Extract body text */ $i_max = 0; $j_max = 0; $max = 0; for ($i = 0; $i < count($endcode) - 1; $i++) { if ($endcode[$i] >= 0) { continue; } for ($j = $i; $j < count($endcode); $j++) { if ($endcode[$j] >= 0) { continue; } /* Calculate max in range [i .. j] */ $S = $this->i_TagBefore($endcode, $i) + $this->i_fTagAfter($endcode, $j) + $this->i_fTextBetween($endcode, $i, $j); if ($S > $max) { $max = $S; $i_max = $i; $j_max = $j; } } } /* Calculate start and end point */ $start = 0; $end = 0; for ($i = 0; $i < $i_max; $i++) { if ($endcode[$i] == 0) { $start++; } else { $start += abs($endcode[$i]); } } for ($i = 0; $i < $j_max; $i++) { if ($endcode[$i] == 0) { $end++; } else { $end += abs($endcode[$i]); } } $return_text = array(); /* Calculate body text */ //for($i = $start - 1; $i <= $end -1; $i++) for ($i = 0; $i <= $end - 1; $i++) { $body_text[] = $token[$i]; } } $body_text = implode(' ', $body_text); return $body_text; }