/** * Get the parser. * * @return ParserInterface */ public function getParser() { $ruleLoader = new RuleLoader($this->config); $rules = $ruleLoader->getRules($this->url); if (!empty($rules['grabber'])) { Logger::setMessage(get_called_class() . ': Parse content with rules'); foreach ($rules['grabber'] as $pattern => $rule) { $url = new Url($this->url); $sub_url = $url->getFullPath(); if (preg_match($pattern, $sub_url)) { Logger::setMessage(get_called_class() . ': Matched url ' . $sub_url); return new RuleParser($this->html, $rule); } } } elseif ($this->enableCandidateParser) { Logger::setMessage(get_called_class() . ': Parse content with candidates'); return new CandidateParser($this->html); } return; }
/** * Called after XML parsing. * * @param string $content the content that should be filtered */ public function filterRules($content) { // the constructor should require a config, then this if can be removed if ($this->config === null) { $config = new Config(); } else { $config = $this->config; } $loader = new RuleLoader($config); $rules = $loader->getRules($this->website); $url = new Url($this->website); $sub_url = $url->getFullPath(); if (isset($rules['filter'])) { foreach ($rules['filter'] as $pattern => $rule) { if (preg_match($pattern, $sub_url)) { foreach ($rule as $search => $replace) { $content = preg_replace($search, $replace, $content); } } } } return $content; }