/** * {@inheritdoc} */ public function parse() { if (!$this->dom) { return $this->meta; } foreach ($this->meta as $key => $value) { $mutator = 'get' . studly_case($key); if (method_exists($this, $mutator)) { // We will use a mutator to process an attribute value if // we found one, otherwise we'll do nothing. $this->meta[$key] = $this->{$mutator}(); } } if ($this->config->get('strip_tags') && $this->meta['description']) { $this->meta['description'] = strip_tags($this->meta['description']); } return $this->meta; }
/** * Parse an url address, and fill up the basic metadata. * * @param string $url * @param \Buzz\Client\ClientInterface $client * @return \Yoozi\Miner\Extractor */ public function fromUrl($url, HttpClientInterface $client = null) { $browser = new Browser($client); $response = $browser->get($url, $this->config->get('headers')); $request = $browser->getLastRequest(); $this->metadata = array(); foreach (array('url', 'host', 'domain', 'favicon') as $key) { $this->metadata[$key] = $this->{'get' . studly_case($key)}($request); } if ($response->isSuccessful()) { $this->document = $response->getContent(); $this->charset = $response->getHeaderAttribute('Content-Type', 'charset'); } return $this; }
public function testBasicMethods() { $items = array('parser' => 'hybrid', 'hybrid' => array('primary' => 'readability', 'secendary' => 'meta'), 'headers' => array('User-Agent' => 'Mozilla/5.0'), 'strip_tags' => true); $config = new Config($items); $this->assertEquals($config->toArray(), $items); $this->assertEquals($config->toJson(), json_encode($items)); $this->assertEquals($config->get('parser'), $items['parser']); $config->set('parser', 'Readability'); $this->assertEquals($config->get('parser'), 'Readability'); }