/** * */ public function getSimpleFormatForArticle(\Article $article) { $measurement = \Wikia\Measurements\Time::start([__CLASS__, __METHOD__]); $cacheKey = wfMemcKey("SimpleJson", $article->getPage()->getId(), self::SIMPLE_JSON_SCHEMA_VERSION); $jsonSimple = $this->app->wg->memc->get($cacheKey); if ($jsonSimple === false) { /** * Prevention from circular references, when parsing articles with tabs. * * E.g. when page contains tab, which is actually link to itself, * or if any tab contains tab, which referenced to given page. * * @see DivContainingHeadersVisitor::parseTabview */ \Wikia\JsonFormat\HtmlParser::markAsVisited($article->getTitle()->getText()); $jsonFormatRootNode = $this->getJsonFormatForArticle($article); // We have finished parsing of article, so we can clean array of visited articles \Wikia\JsonFormat\HtmlParser::clearVisited(); $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier(); $jsonSimple = $simplifier->simplify($jsonFormatRootNode, $article->getTitle()->getText()); $this->app->wg->memc->set($cacheKey, $jsonSimple, self::SIMPLE_JSON_CACHE_EXPIRATION); } $measurement->stop(); return $jsonSimple; }
/** * Piece of logic, which traversing HTML source of article, and generating simplified JSON representation * * @param $html * @return array */ protected function getSimpleJson($html) { $body = $this->getDomBody($html); $jsonFormatTraversingState = new \JsonFormatBuilder(); $visitor = (new \Wikia\JsonFormat\HtmlParser())->createVisitor($jsonFormatTraversingState); $visitor->visit($body); $root = $jsonFormatTraversingState->getJsonRoot(); $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier(); $generatedJson = $simplifier->simplify($root, 'test'); return $generatedJson; }
public function testPrehistoricIceMan() { // PLA-1343 $htmlParser = new \Wikia\JsonFormat\HtmlParser(); $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier(); $text = '<p><b>"Prehistoric Ice Man"</b> is the eighteenth and final episode of ' . '<a href="/wiki/Season_Two" title="Season Two">Season Two</a>, and the 31st ' . 'overall episode of <i>South Park</i>. It originally aired on January 20, 1999' . '<sup id="cite_ref-0" class="reference"><a href="#cite_note-0">[1]</a></sup>.</p>'; $jsonOutput = $htmlParser->parse($text); $jsonSimple = $simplifier->simplify($jsonOutput, "Prehistoric Ice Man"); $this->assertEquals("paragraph", $jsonSimple['sections'][0]['content'][0]['type']); $paragraph = $jsonSimple['sections'][0]['content'][0]['text']; $this->assertEquals('"Prehistoric Ice Man" is the eighteenth and final episode of Season Two, ' . 'and the 31st overall episode of South Park. It originally aired on January 20, 1999.', $paragraph); }