/** * {@inheritdoc} */ public function parse() { if (!$this->dom) { return $this->meta; } foreach ($this->dom->getElementsByTagName('meta') as $tag) { if ($tag->hasAttribute('property')) { $property = strtolower(trim($tag->getAttribute('property'))); if (strpos($property, 'og:') === 0 && ($key = substr($property, 3))) { $this->og[$key] = $tag->getAttribute('content'); continue; } } if ($tag->hasAttribute('name')) { $name = strtolower(trim($tag->getAttribute('name'))); if (in_array($name, array('author', 'keywords', 'description'))) { $this->meta[$name] = $tag->getAttribute('content'); continue; } } // We'll try to fetch the cover image from microdata (@see schema.org), // if it is absent from the open graph tags. if (!$this->og['image'] && $tag->hasAttribute('itemprop') && $tag->getAttribute('property') == 'image') { $this->og['image'] = $tag->getAttribute('content'); } } return parent::parse(); }
/** * {@inheritdoc} */ public function parse() { if (!$this->dom || !($topBox = $this->getTopBox())) { return $this->meta; } $this->contentDom = new \DOMDocument('1.0'); $this->contentDom->appendChild($this->contentDom->importNode($topBox, true)); foreach ($this->junkTags as $tag) { $this->removeJunkTag($tag); } foreach ($this->junkAttrs as $attr) { $this->removeJunkAttr($attr); } $this->content = mb_convert_encoding($this->contentDom->saveHTML(), 'utf-8', "HTML-ENTITIES"); return parent::parse(); }
public function parse($format, $data, $charset = 'UTF-8') { if (!$this->isSupported($format)) { return new \RuntimeException("Format {$format} is not supported."); } $date = $this->getCrawler($data, $charset)->filter(static::$selectorDate)->first()->attr('id'); $today = true; if (!empty($date) && is_string($date)) { $date = preg_replace('/^[^.0-9[:space:]]+[[:space:]]+/', '', $date); $date = \DateTime::createFromFormat('j.n.Y', $date); if ($date !== false && (new \DateTime('today'))->getTimestamp() - $date->getTimestamp() > 24 * 60 * 60) { $today = false; } } if ($today) { return parent::parse($format, $data, $charset); } else { return []; } }
public function parse($format, $data, $charset = 'UTF-8') { $data = preg_replace('/ /', ' ', $data); return parent::parse($format, $data, $charset); }