/**
  * Save the plain data into a file and hash the resulting filename.
  * 
  * Note: Overwrites old files and uses existing subdirectories.
  * 
  * @param  Result $result
  * 
  * @return Result
  */
 public function execute(Result $result)
 {
     $subDir = $this->dir . DIRECTORY_SEPARATOR . substr($result->getHash(), 0, 1);
     $saveTo = $subDir . DIRECTORY_SEPARATOR . $result->getHash();
     $fs = new Filesystem();
     $fs->dumpFile($saveTo, $result->getData());
     return $result;
 }
Example #2
0
 /**
  * Return text that matches a XPath expression.
  *
  * @param  Result $result
  *
  * @return Result
  */
 public function execute(Result $result)
 {
     $domCrawler = new DOMCrawler();
     $domCrawler->addContent($result->getData());
     $domCrawler->filterXPath($this->xpath)->each(function (DOMCrawler $node) {
         $text = trim($node->text());
         if (!empty($text)) {
             echo $text . PHP_EOL;
         }
     });
     return $result;
 }
Example #3
0
 /**
  * Save the plain data into a file and hash the resulting filename.
  * 
  * Note: Overwrites old files and uses existing subdirectories.
  * 
  * @param  Result $result
  * 
  * @return Result
  */
 public function execute(Result $result)
 {
     // strip protocol and www.
     $url = preg_replace('/^((https?):\\/\\/)?(www\\d{0,3}\\.)?/', '', $result->getLink());
     // URLs like blog.com/posts and blog.com/posts/a would conflict, because
     // the first URL would be created as a file and the same name cannot be used for
     // a directory with the same name. Workaround is to simply attach an underscore
     // to the *index* file.
     if ($this->isPrettyUrl($url)) {
         $url = $url . '_';
     }
     $saveTo = $this->dir . DIRECTORY_SEPARATOR . $url;
     $fs = new Filesystem();
     $fs->dumpFile($saveTo, $result->getData());
     return $result;
 }
Example #4
0
 /**
  * Minify HTML.
  * 
  * @param  Result $result
  * 
  * @return Result
  */
 public function execute(Result $result)
 {
     $result->setData(HTMLMinify::minify($result->getData(), array('optimizationLevel' => HTMLMinify::OPTIMIZATION_ADVANCED)));
     return $result;
 }