예제 #1
0
파일: Meta.php 프로젝트: yoozi/golem
 /**
  * {@inheritdoc}
  */
 public function parse()
 {
     if (!$this->dom) {
         return $this->meta;
     }
     foreach ($this->dom->getElementsByTagName('meta') as $tag) {
         if ($tag->hasAttribute('property')) {
             $property = strtolower(trim($tag->getAttribute('property')));
             if (strpos($property, 'og:') === 0 && ($key = substr($property, 3))) {
                 $this->og[$key] = $tag->getAttribute('content');
                 continue;
             }
         }
         if ($tag->hasAttribute('name')) {
             $name = strtolower(trim($tag->getAttribute('name')));
             if (in_array($name, array('author', 'keywords', 'description'))) {
                 $this->meta[$name] = $tag->getAttribute('content');
                 continue;
             }
         }
         // We'll try to fetch the cover image from microdata (@see schema.org),
         // if it is absent from the open graph tags.
         if (!$this->og['image'] && $tag->hasAttribute('itemprop') && $tag->getAttribute('property') == 'image') {
             $this->og['image'] = $tag->getAttribute('content');
         }
     }
     return parent::parse();
 }
예제 #2
0
파일: Readability.php 프로젝트: yoozi/golem
 /**
  * {@inheritdoc}
  */
 public function parse()
 {
     if (!$this->dom || !($topBox = $this->getTopBox())) {
         return $this->meta;
     }
     $this->contentDom = new \DOMDocument('1.0');
     $this->contentDom->appendChild($this->contentDom->importNode($topBox, true));
     foreach ($this->junkTags as $tag) {
         $this->removeJunkTag($tag);
     }
     foreach ($this->junkAttrs as $attr) {
         $this->removeJunkAttr($attr);
     }
     $this->content = mb_convert_encoding($this->contentDom->saveHTML(), 'utf-8', "HTML-ENTITIES");
     return parent::parse();
 }
예제 #3
0
 public function parse($format, $data, $charset = 'UTF-8')
 {
     if (!$this->isSupported($format)) {
         return new \RuntimeException("Format {$format} is not supported.");
     }
     $date = $this->getCrawler($data, $charset)->filter(static::$selectorDate)->first()->attr('id');
     $today = true;
     if (!empty($date) && is_string($date)) {
         $date = preg_replace('/^[^.0-9[:space:]]+[[:space:]]+/', '', $date);
         $date = \DateTime::createFromFormat('j.n.Y', $date);
         if ($date !== false && (new \DateTime('today'))->getTimestamp() - $date->getTimestamp() > 24 * 60 * 60) {
             $today = false;
         }
     }
     if ($today) {
         return parent::parse($format, $data, $charset);
     } else {
         return [];
     }
 }
예제 #4
0
파일: SUZ.php 프로젝트: tomkukral/obedar
 public function parse($format, $data, $charset = 'UTF-8')
 {
     $data = preg_replace('/ /', ' ', $data);
     return parent::parse($format, $data, $charset);
 }