Beispiel #1
0
 /**
  * returns single page response, or false if not found.
  *
  * @param string $html
  * @param string $url
  *
  * @return false|array From httpClient fetch
  */
 private function getSinglePage($html, $url)
 {
     $this->logger->log('debug', 'Looking for site config files to see if single page link exists');
     $siteConfig = $this->configBuilder->buildFromUrl($url);
     // no single page found?
     if (empty($siteConfig->single_page_link)) {
         $this->logger->log('debug', 'No "single_page_link" config found');
         return false;
     }
     // Build DOM tree from HTML
     $readability = new Readability($html, $url);
     $xpath = new \DOMXPath($readability->dom);
     // Loop through single_page_link xpath expressions
     $singlePageUrl = null;
     foreach ($siteConfig->single_page_link as $pattern) {
         $elems = $xpath->evaluate($pattern, $readability->dom);
         if (is_string($elems)) {
             $singlePageUrl = trim($elems);
             break;
         } elseif ($elems instanceof \DOMNodeList && $elems->length > 0) {
             foreach ($elems as $item) {
                 if ($item instanceof \DOMElement && $item->hasAttribute('href')) {
                     $singlePageUrl = $item->getAttribute('href');
                     break 2;
                 } elseif ($item instanceof \DOMAttr && $item->value) {
                     $singlePageUrl = $item->value;
                     break 2;
                 }
             }
         }
     }
     if (!$singlePageUrl) {
         $this->logger->log('debug', 'No url found');
         return false;
     }
     // try to resolve against $url
     $singlePageUrl = $this->makeAbsoluteStr($url, $singlePageUrl);
     // check it's not what we have already!
     if (false !== $singlePageUrl && $singlePageUrl != $url) {
         // it's not, so let's try to fetch it...
         $response = $this->httpClient->fetch($singlePageUrl, false, $siteConfig->http_header);
         if ($response['status'] < 300) {
             $this->logger->log('debug', 'Single page content found with url', ['url' => $singlePageUrl]);
             return $response;
         }
     }
     $this->logger->log('debug', 'No content found with url', ['url' => $singlePageUrl]);
     return false;
 }
Beispiel #2
0
 public function testBuildWithCachedVersion()
 {
     $configBuilder = new ConfigBuilder(array('site_config' => array(dirname(__FILE__) . '/../fixtures/site_config')));
     $res = $configBuilder->build('fr.wikipedia.org');
     $this->assertInstanceOf('Graby\\SiteConfig\\SiteConfig', $res);
     $configBuilder->addToCache($res->cache_key, $res);
     $res2 = $configBuilder->build('fr.wikipedia.org');
     $this->assertInstanceOf('Graby\\SiteConfig\\SiteConfig', $res);
     $this->assertEquals($res, $res2, 'Config retrieve from cache');
 }