/** * */ public function getSimpleFormatForArticle(\Article $article) { $measurement = \Wikia\Measurements\Time::start([__CLASS__, __METHOD__]); $cacheKey = wfMemcKey("SimpleJson", $article->getPage()->getId(), self::SIMPLE_JSON_SCHEMA_VERSION); $jsonSimple = $this->app->wg->memc->get($cacheKey); if ($jsonSimple === false) { /** * Prevention from circular references, when parsing articles with tabs. * * E.g. when page contains tab, which is actually link to itself, * or if any tab contains tab, which referenced to given page. * * @see DivContainingHeadersVisitor::parseTabview */ \Wikia\JsonFormat\HtmlParser::markAsVisited($article->getTitle()->getText()); $jsonFormatRootNode = $this->getJsonFormatForArticle($article); // We have finished parsing of article, so we can clean array of visited articles \Wikia\JsonFormat\HtmlParser::clearVisited(); $simplifier = new Wikia\JsonFormat\JsonFormatSimplifier(); $jsonSimple = $simplifier->simplify($jsonFormatRootNode, $article->getTitle()->getText()); $this->app->wg->memc->set($cacheKey, $jsonSimple, self::SIMPLE_JSON_CACHE_EXPIRATION); } $measurement->stop(); return $jsonSimple; }
/** * This is div with <tabview> tabs (ajax tabs). * It has following structure: * * <div id="flytabs_0"> * <ul> * <li class="selected" data-tab="flytabs_00"> * <a href="/wiki/New_Moon?action=render"> * <span>New Moon</span> * </a> * </li> * <li class="" data-tab="flytabs_01"> * <a href="/wiki/Eclipse?action=render"> * <span>Eclipse</span> * </a> * </li> * </ul> * </div> * * This structure is the same for all wikia pages, which loading tabs by ajax. * * So, this method is iterating over all links inside this list and parsing * content of corresponding articles (preventing from circular references). * * @param DOMNode $currentNode */ protected function parseTabview(DOMNode $currentNode) { $xpath = new DOMXPath($currentNode->ownerDocument); $tabs = $xpath->query(".//a", $currentNode); $htmlParser = new Wikia\JsonFormat\HtmlParser(); foreach ($tabs as $tab) { $url = $xpath->query('./@href', $tab)->item(0); $tabTitle = $this->getTabTitle($xpath, $tab); $article = $this->getArticleByUrl($url); if (empty($article)) { continue; } $title = $article->getTitle()->getText(); // Prevent from cyclic references if (\Wikia\JsonFormat\HtmlParser::isVisited($title)) { continue; } \Wikia\JsonFormat\HtmlParser::markAsVisited($title); $tabSection = $this->parseArticleToSection($article, $htmlParser, $tabTitle); $this->adjustLevel($tabSection); $this->getJsonFormatBuilder()->add($tabSection); } }
public function testAmericanDadWrapper() { $htmlParser = new HtmlParser(); $node = $htmlParser->parse('<div style="clear:both; width:100%; border:2px solid #1E90FF; background-color:#E3F2FF">' . '<div style="padding: 1em 1.5em; min-height:500px;"><a>foo</div></div>'); $this->assertEquals('root', $node->getType()); $this->assertEquals(1, sizeof($node->getChildren()), 'wrong number of children'); $this->assertEquals('link', $node->getChildren()[0]->getType()); $this->assertEquals('foo', $node->getChildren()[0]->getText()); }