/** * Get text before first heading. * @param string $text * @return string|null */ private function extractHeadingBeforeFirstHeading($text) { $matches = []; if (!preg_match('/<h[123456]>/', $text, $matches, PREG_OFFSET_CAPTURE)) { // There isn't a first heading so we interpret this as the article // being entirely without heading. return null; } $text = substr($text, 0, $matches[0][1]); if (!$text) { // There isn't any text before the first heading so we declare there isn't // a first heading. return null; } $formatter = new HtmlFormatter($text); $formatter->remove($this->excludedElementSelectors); $formatter->remove($this->auxiliaryElementSelectors); $formatter->filterContent(); $text = trim(Sanitizer::stripAllTags($formatter->getText())); if (!$text) { // There isn't any text after filtering before the first heading so we declare // that there isn't a first heading. return null; } return $text; }