Exemplo n.º 1
0
 /**
  * Get the parser.
  *
  * @return ParserInterface
  */
 public function getParser()
 {
     $ruleLoader = new RuleLoader($this->config);
     $rules = $ruleLoader->getRules($this->url);
     if (!empty($rules['grabber'])) {
         Logger::setMessage(get_called_class() . ': Parse content with rules');
         foreach ($rules['grabber'] as $pattern => $rule) {
             $url = new Url($this->url);
             $sub_url = $url->getFullPath();
             if (preg_match($pattern, $sub_url)) {
                 Logger::setMessage(get_called_class() . ': Matched url ' . $sub_url);
                 return new RuleParser($this->html, $rule);
             }
         }
     } elseif ($this->enableCandidateParser) {
         Logger::setMessage(get_called_class() . ': Parse content with candidates');
         return new CandidateParser($this->html);
     }
     return;
 }
Exemplo n.º 2
0
 /**
  * Called after XML parsing.
  *
  * @param string $content the content that should be filtered
  */
 public function filterRules($content)
 {
     // the constructor should require a config, then this if can be removed
     if ($this->config === null) {
         $config = new Config();
     } else {
         $config = $this->config;
     }
     $loader = new RuleLoader($config);
     $rules = $loader->getRules($this->website);
     $url = new Url($this->website);
     $sub_url = $url->getFullPath();
     if (isset($rules['filter'])) {
         foreach ($rules['filter'] as $pattern => $rule) {
             if (preg_match($pattern, $sub_url)) {
                 foreach ($rule as $search => $replace) {
                     $content = preg_replace($search, $replace, $content);
                 }
             }
         }
     }
     return $content;
 }