protected function analyzeHtml($html, &$more = null) { $pos = 0; $tags = array(); preg_match_all("/<(\\/?)(div|table)[^>]*>/isU", $html, $tags, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); $depth = 0; $chunks = array(); foreach ($tags as $tag) { $tagcontent = $tag[0][0]; $tagstartpos = $tag[0][1]; $tagendpos = $tagstartpos + strlen($tagcontent); $tagopening = empty($tag[1][0]); if ($tagopening) { if ($depth == 0) { $remnant = trim(substr($html, $pos, $tagstartpos - $pos)); if (!empty($remnant)) { $chunks[] = array($pos, $tagstartpos - $pos, $remnant, false); } $pos = $tagstartpos; $sectiontagstartpos = $tagstartpos; $sectiontagcontent = $tagcontent; } $depth++; } else { $depth--; if ($depth == 0) { $id = false; if (preg_match("/(?:id|class)=[\"']?([^\"'> ]+)[\"'>\t ]/isU", $sectiontagcontent, $matches)) { $id = $matches[1]; } $content = trim(substr($html, $sectiontagstartpos, $tagendpos - $sectiontagstartpos)); $chunks[] = array($sectiontagstartpos, $tagendpos - $sectiontagstartpos, $content, $id); $pos = $tagendpos; } if ($depth < 0) { $depth = 0; } } } $remnant = trim(substr($html, $pos)); if (!empty($remnant)) { $chunks[] = array($pos, strlen($html) - $pos, $remnant, false); } $notices = array(); foreach ($chunks as $chunk) { if ($chunk[3]) { $notice = new EditPageNotice($chunk[2], $chunk[3]); if ($notice->getSummary() != '') { $this->notices->add($notice); $notices[] = $notice; } } } if (count($notices) < count($chunks)) { $more = true; } return $notices; }