Example #1
0
 private static function addByAttribute($img, $attribute, \DOMElement $html, Bag $bag, $domain = null)
 {
     $src = $img->hasAttribute($attribute);
     if ($src) {
         $src = new Url($img->getAttribute($attribute));
         //Is src relative?
         if (!$src->getDomain()) {
             $bag->add('images', ['url' => $src->getUrl(), 'alt' => self::_getAltTag($img), 'href' => self::extractA($img, $bag->get('request_url'))]);
             return;
         }
         //Avoid external images or in external links
         if ($domain !== null) {
             if (!preg_match('~(ebayimg)~i', $src->getDomain())) {
                 return;
             }
             $bag->add('images', ['url' => $src->getUrl(), 'alt' => self::_getAltTag($img), 'href' => self::extractA($img, $bag->get('request_url'))]);
         }
     }
 }
Example #2
0
 /**
  * Extract <img> elements
  *
  * @param \DOMElement $html
  * @param Bag         $bag
  * @param null|string $domain
  */
 protected static function extractImages(\DOMElement $html, Bag $bag, $domain = null)
 {
     foreach ($html->getElementsByTagName('img') as $img) {
         if ($img->hasAttribute('src')) {
             $src = new Url($img->getAttribute('src'));
             //Is src relative?
             if (!$src->getDomain()) {
                 $bag->add('images', ['url' => $src->getUrl(), 'alt' => $img->hasAttribute('alt') ? $img->getAttribute('alt') : '', 'href' => self::extractA($img, $bag->get('request_url'))]);
                 continue;
             }
             //Avoid external images or in external links
             if ($domain !== null) {
                 if ($src->getDomain() !== $domain) {
                     continue;
                 }
                 $parent = $img->parentNode;
                 while ($parent && isset($parent->tagName)) {
                     if ($parent->tagName === 'a') {
                         if ($parent->hasAttribute('href')) {
                             $ahref = $parent->getAttribute('href');
                             $href = new Url($ahref);
                             //slow, very slow
                             //                                if(in_array(strtolower(pathinfo($ahref, PATHINFO_EXTENSION)), ['jpg', 'jpeg', 'png', 'gif'])) {
                             //                                    $src = new Url($ahref);
                             //                                    continue;
                             //                                }
                             if ($href->getDomain() && $src->getDomain() !== $domain) {
                                 continue 2;
                             }
                         }
                         if ($parent->hasAttribute('rel') && (string) $parent->getAttribute('rel') === 'nofollow') {
                             continue 2;
                         }
                         break;
                     }
                     $parent = $parent->parentNode;
                 }
                 $bag->add('images', ['url' => $src->getUrl(), 'alt' => $img->hasAttribute('alt') ? $img->getAttribute('alt') : '', 'href' => self::extractA($img, $bag->get('request_url'))]);
             }
         }
     }
 }
Example #3
0
 /**
  * Extract <img> elements
  *
  * @param \DOMElement $html
  * @param Bag         $bag
  * @param null|string $domain
  */
 protected static function extractImages(\DOMElement $html, Bag $bag, $domain = null)
 {
     foreach ($html->getElementsByTagName('img') as $img) {
         if ($img->hasAttribute('src')) {
             $src = new Url($img->getAttribute('src'));
             //Is src relative?
             if (!$src->getDomain()) {
                 $bag->add('images', $src->getUrl());
                 continue;
             }
             //Avoid external images or in external links
             if ($domain !== null) {
                 if ($src->getDomain() !== $domain) {
                     continue;
                 }
                 $parent = $img->parentNode;
                 while ($parent && isset($parent->tagName)) {
                     if ($parent->tagName === 'a') {
                         if ($parent->hasAttribute('href')) {
                             $href = new Url($parent->getAttribute('href'));
                             if ($href->getDomain() && $src->getDomain() !== $domain) {
                                 continue 2;
                             }
                         }
                         if ($parent->hasAttribute('rel') && (string) $parent->getAttribute('rel') === 'nofollow') {
                             continue 2;
                         }
                         break;
                     }
                     $parent = $parent->parentNode;
                 }
                 $bag->add('images', $src->getUrl());
             }
         }
     }
 }
Example #4
0
 /**
  * Extract information from the <meta> elements.
  *
  * @param \DOMDocument $html
  * @param Bag          $bag
  */
 protected static function extractFromMeta(\DOMDocument $html, Bag $bag)
 {
     foreach (Utils::getMetas($html) as $meta) {
         list($name, $value, $element) = $meta;
         if (!$value) {
             continue;
         }
         if ($name) {
             $name = strtolower($name);
             switch ($name) {
                 case 'msapplication-tileimage':
                     $bag->add('icons', $value);
                     continue 2;
                 default:
                     $bag->set($name, $value);
                     continue 2;
             }
         }
         if ($element->hasAttribute('itemprop')) {
             $bag->set($element->getAttribute('itemprop'), $value);
         }
         if ($element->hasAttribute('http-equiv')) {
             $bag->set($element->getAttribute('http-equiv'), $value);
         }
     }
 }