/** * @param ReferenceFactory $factory * @param Extractor[] $extractors * @param string $text * @return Reference[] * @throws MWException * @throws \Flow\Exception\WikitextException */ protected function extractReferences(ReferenceFactory $factory, array $extractors, $text) { $dom = Utils::createDOM($text); $output = array(); $xpath = new DOMXPath($dom); foreach ($extractors as $extractor) { $elements = $xpath->query($extractor->getXPath()); if (!$elements) { $class = get_class($extractor); throw new MWException("Malformed xpath from {$class}: " . $extractor->getXPath()); } foreach ($elements as $element) { try { $ref = $extractor->perform($factory, $element); } catch (InvalidReferenceException $e) { wfDebugLog('Flow', 'Invalid reference detected, skipping element'); $ref = null; } // no reference was generated if ($ref === null) { continue; } // reference points to a special page if ($ref->getSrcTitle()->isSpecialPage()) { continue; } $output[] = $ref; } } return $output; }
/** * Only extract templates to copy to Flow description. * Requires Parsoid, to reliably extract templates. * * @param string $content * @return string */ protected function extractTemplates($content) { $content = Utils::convert('wikitext', 'html', $content, $this->title); $dom = Utils::createDOM($content); $xpath = new \DOMXPath($dom); $templates = $xpath->query('//*[@typeof="mw:Transclusion"]'); $content = ''; foreach ($templates as $template) { $content .= $dom->saveHTML($template) . "\n"; } return Utils::convert('html', 'wikitext', $content, $this->title); }
/** * Creates a DOM with extra considerations for BC with * previous parsoid content * * @param string $content HTML from parsoid * @return DOMDocument */ public static function createDOM($content) { /* * The body tag is required otherwise <meta> tags at the top are * magic'd into <head> rather than kept with the content. */ if (substr($content, 0, 5) !== '<body') { // BC: content currently comes from parsoid and is stored // wrapped in <body> tags, but prior to I0d9659f we were // storing only the contents and not the body tag itself. $content = "<body>{$content}</body>"; } return Utils::createDOM($content); }
/** * @dataProvider createDomProvider */ public function testCreateDomErrorModes($message, $content) { $this->assertInstanceOf('DOMDocument', Utils::createDOM($content), $message); }