public function testGetRules()
 {
     $loader = new RuleLoader(new Config());
     $this->assertNotEmpty($loader->getRules('http://www.egscomics.com/index.php?id=1690'));
     $loader = new RuleLoader(new Config());
     $this->assertEmpty($loader->getRules('http://localhost/foobar'));
 }
Example #2
0
 /**
  * Get the parser.
  *
  * @return ParserInterface
  */
 public function getParser()
 {
     $ruleLoader = new RuleLoader($this->config);
     $rules = $ruleLoader->getRules($this->url);
     if (!empty($rules['grabber'])) {
         Logger::setMessage(get_called_class() . ': Parse content with rules');
         foreach ($rules['grabber'] as $pattern => $rule) {
             $url = new Url($this->url);
             $sub_url = $url->getFullPath();
             if (preg_match($pattern, $sub_url)) {
                 Logger::setMessage(get_called_class() . ': Matched url ' . $sub_url);
                 return new RuleParser($this->html, $rule);
             }
         }
     } elseif ($this->enableCandidateParser) {
         Logger::setMessage(get_called_class() . ': Parse content with candidates');
     }
     return new CandidateParser($this->html);
 }
Example #3
0
 /**
  * Called after XML parsing.
  *
  * @param string $content the content that should be filtered
  */
 public function filterRules($content)
 {
     // the constructor should require a config, then this if can be removed
     if ($this->config === null) {
         $config = new Config();
     } else {
         $config = $this->config;
     }
     $loader = new RuleLoader($config);
     $rules = $loader->getRules($this->website);
     $url = new Url($this->website);
     $sub_url = $url->getFullPath();
     if (isset($rules['filter'])) {
         foreach ($rules['filter'] as $pattern => $rule) {
             if (preg_match($pattern, $sub_url)) {
                 foreach ($rule as $search => $replace) {
                     $content = preg_replace($search, $replace, $content);
                 }
             }
         }
     }
     return $content;
 }