Пример #1
0
 /**
  *
  */
 public function getSimpleFormatForArticle(\Article $article)
 {
     $measurement = \Wikia\Measurements\Time::start([__CLASS__, __METHOD__]);
     $cacheKey = wfMemcKey("SimpleJson", $article->getPage()->getId(), self::SIMPLE_JSON_SCHEMA_VERSION);
     $jsonSimple = $this->app->wg->memc->get($cacheKey);
     if ($jsonSimple === false) {
         /**
          * Prevention from circular references, when parsing articles with tabs.
          *
          * E.g. when page contains tab, which is actually link to itself,
          * or if any tab contains tab, which referenced to given page.
          *
          * @see DivContainingHeadersVisitor::parseTabview
          */
         \Wikia\JsonFormat\HtmlParser::markAsVisited($article->getTitle()->getText());
         $jsonFormatRootNode = $this->getJsonFormatForArticle($article);
         // We have finished parsing of article, so we can clean array of visited articles
         \Wikia\JsonFormat\HtmlParser::clearVisited();
         $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier();
         $jsonSimple = $simplifier->simplify($jsonFormatRootNode, $article->getTitle()->getText());
         $this->app->wg->memc->set($cacheKey, $jsonSimple, self::SIMPLE_JSON_CACHE_EXPIRATION);
     }
     $measurement->stop();
     return $jsonSimple;
 }
 /**
  * Piece of logic, which traversing HTML source of article, and generating simplified JSON representation
  * 
  * @param $html
  * @return array
  */
 protected function getSimpleJson($html)
 {
     $body = $this->getDomBody($html);
     $jsonFormatTraversingState = new \JsonFormatBuilder();
     $visitor = (new \Wikia\JsonFormat\HtmlParser())->createVisitor($jsonFormatTraversingState);
     $visitor->visit($body);
     $root = $jsonFormatTraversingState->getJsonRoot();
     $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier();
     $generatedJson = $simplifier->simplify($root, 'test');
     return $generatedJson;
 }
Пример #3
0
 public function testPrehistoricIceMan()
 {
     // PLA-1343
     $htmlParser = new \Wikia\JsonFormat\HtmlParser();
     $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier();
     $text = '<p><b>"Prehistoric Ice Man"</b> is the eighteenth and final episode of ' . '<a href="/wiki/Season_Two" title="Season Two">Season Two</a>, and the 31st ' . 'overall episode of <i>South Park</i>. It originally aired on January 20, 1999' . '<sup id="cite_ref-0" class="reference"><a href="#cite_note-0">[1]</a></sup>.</p>';
     $jsonOutput = $htmlParser->parse($text);
     $jsonSimple = $simplifier->simplify($jsonOutput, "Prehistoric Ice Man");
     $this->assertEquals("paragraph", $jsonSimple['sections'][0]['content'][0]['type']);
     $paragraph = $jsonSimple['sections'][0]['content'][0]['text'];
     $this->assertEquals('"Prehistoric Ice Man" is the eighteenth and final episode of Season Two, ' . 'and the 31st overall episode of South Park. It originally aired on January 20, 1999.', $paragraph);
 }