Example #1
0
 /**
  * {@inheritdoc}
  */
 public function parse()
 {
     if (!$this->dom) {
         return $this->meta;
     }
     foreach ($this->meta as $key => $value) {
         $mutator = 'get' . studly_case($key);
         if (method_exists($this, $mutator)) {
             // We will use a mutator to process an attribute value if
             // we found one, otherwise we'll do nothing.
             $this->meta[$key] = $this->{$mutator}();
         }
     }
     if ($this->config->get('strip_tags') && $this->meta['description']) {
         $this->meta['description'] = strip_tags($this->meta['description']);
     }
     return $this->meta;
 }
Example #2
0
 /**
  * Parse an url address, and fill up the basic metadata.
  *
  * @param  string                       $url
  * @param  \Buzz\Client\ClientInterface $client
  * @return \Yoozi\Miner\Extractor
  */
 public function fromUrl($url, HttpClientInterface $client = null)
 {
     $browser = new Browser($client);
     $response = $browser->get($url, $this->config->get('headers'));
     $request = $browser->getLastRequest();
     $this->metadata = array();
     foreach (array('url', 'host', 'domain', 'favicon') as $key) {
         $this->metadata[$key] = $this->{'get' . studly_case($key)}($request);
     }
     if ($response->isSuccessful()) {
         $this->document = $response->getContent();
         $this->charset = $response->getHeaderAttribute('Content-Type', 'charset');
     }
     return $this;
 }
Example #3
0
 public function testBasicMethods()
 {
     $items = array('parser' => 'hybrid', 'hybrid' => array('primary' => 'readability', 'secendary' => 'meta'), 'headers' => array('User-Agent' => 'Mozilla/5.0'), 'strip_tags' => true);
     $config = new Config($items);
     $this->assertEquals($config->toArray(), $items);
     $this->assertEquals($config->toJson(), json_encode($items));
     $this->assertEquals($config->get('parser'), $items['parser']);
     $config->set('parser', 'Readability');
     $this->assertEquals($config->get('parser'), 'Readability');
 }