Example #1
0
 /**
  *
  */
 public function getSimpleFormatForArticle(\Article $article)
 {
     $measurement = \Wikia\Measurements\Time::start([__CLASS__, __METHOD__]);
     $cacheKey = wfMemcKey("SimpleJson", $article->getPage()->getId(), self::SIMPLE_JSON_SCHEMA_VERSION);
     $jsonSimple = $this->app->wg->memc->get($cacheKey);
     if ($jsonSimple === false) {
         /**
          * Prevention from circular references, when parsing articles with tabs.
          *
          * E.g. when page contains tab, which is actually link to itself,
          * or if any tab contains tab, which referenced to given page.
          *
          * @see DivContainingHeadersVisitor::parseTabview
          */
         \Wikia\JsonFormat\HtmlParser::markAsVisited($article->getTitle()->getText());
         $jsonFormatRootNode = $this->getJsonFormatForArticle($article);
         // We have finished parsing of article, so we can clean array of visited articles
         \Wikia\JsonFormat\HtmlParser::clearVisited();
         $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier();
         $jsonSimple = $simplifier->simplify($jsonFormatRootNode, $article->getTitle()->getText());
         $this->app->wg->memc->set($cacheKey, $jsonSimple, self::SIMPLE_JSON_CACHE_EXPIRATION);
     }
     $measurement->stop();
     return $jsonSimple;
 }
 /**
  * This is div with <tabview> tabs (ajax tabs).
  * It has following structure:
  *
  * <div id="flytabs_0">
  *      <ul>
  *          <li class="selected" data-tab="flytabs_00">
  *              <a href="/wiki/New_Moon?action=render">
  *                  <span>New Moon</span>
  *              </a>
  *          </li>
  *          <li class="" data-tab="flytabs_01">
  *              <a href="/wiki/Eclipse?action=render">
  *                  <span>Eclipse</span>
  *              </a>
  *          </li>
  *      </ul>
  * </div>
  *
  * This structure is the same for all wikia pages, which loading tabs by ajax.
  *
  * So, this method is iterating over all links inside this list and parsing
  * content of corresponding articles (preventing from circular references).
  *
  * @param DOMNode $currentNode
  */
 protected function parseTabview(DOMNode $currentNode)
 {
     $xpath = new DOMXPath($currentNode->ownerDocument);
     $tabs = $xpath->query(".//a", $currentNode);
     $htmlParser = new Wikia\JsonFormat\HtmlParser();
     foreach ($tabs as $tab) {
         $url = $xpath->query('./@href', $tab)->item(0);
         $tabTitle = $this->getTabTitle($xpath, $tab);
         $article = $this->getArticleByUrl($url);
         if (empty($article)) {
             continue;
         }
         $title = $article->getTitle()->getText();
         // Prevent from cyclic references
         if (\Wikia\JsonFormat\HtmlParser::isVisited($title)) {
             continue;
         }
         \Wikia\JsonFormat\HtmlParser::markAsVisited($title);
         $tabSection = $this->parseArticleToSection($article, $htmlParser, $tabTitle);
         $this->adjustLevel($tabSection);
         $this->getJsonFormatBuilder()->add($tabSection);
     }
 }
Example #3
0
 public function testAmericanDadWrapper()
 {
     $htmlParser = new HtmlParser();
     $node = $htmlParser->parse('<div style="clear:both; width:100%; border:2px solid #1E90FF; background-color:#E3F2FF">' . '<div style="padding: 1em 1.5em; min-height:500px;"><a>foo</div></div>');
     $this->assertEquals('root', $node->getType());
     $this->assertEquals(1, sizeof($node->getChildren()), 'wrong number of children');
     $this->assertEquals('link', $node->getChildren()[0]->getType());
     $this->assertEquals('foo', $node->getChildren()[0]->getText());
 }