private static function addByAttribute($img, $attribute, \DOMElement $html, Bag $bag, $domain = null) { $src = $img->hasAttribute($attribute); if ($src) { $src = new Url($img->getAttribute($attribute)); //Is src relative? if (!$src->getDomain()) { $bag->add('images', ['url' => $src->getUrl(), 'alt' => self::_getAltTag($img), 'href' => self::extractA($img, $bag->get('request_url'))]); return; } //Avoid external images or in external links if ($domain !== null) { if (!preg_match('~(ebayimg)~i', $src->getDomain())) { return; } $bag->add('images', ['url' => $src->getUrl(), 'alt' => self::_getAltTag($img), 'href' => self::extractA($img, $bag->get('request_url'))]); } } }
/** * Extract <img> elements * * @param \DOMElement $html * @param Bag $bag * @param null|string $domain */ protected static function extractImages(\DOMElement $html, Bag $bag, $domain = null) { foreach ($html->getElementsByTagName('img') as $img) { if ($img->hasAttribute('src')) { $src = new Url($img->getAttribute('src')); //Is src relative? if (!$src->getDomain()) { $bag->add('images', ['url' => $src->getUrl(), 'alt' => $img->hasAttribute('alt') ? $img->getAttribute('alt') : '', 'href' => self::extractA($img, $bag->get('request_url'))]); continue; } //Avoid external images or in external links if ($domain !== null) { if ($src->getDomain() !== $domain) { continue; } $parent = $img->parentNode; while ($parent && isset($parent->tagName)) { if ($parent->tagName === 'a') { if ($parent->hasAttribute('href')) { $ahref = $parent->getAttribute('href'); $href = new Url($ahref); //slow, very slow // if(in_array(strtolower(pathinfo($ahref, PATHINFO_EXTENSION)), ['jpg', 'jpeg', 'png', 'gif'])) { // $src = new Url($ahref); // continue; // } if ($href->getDomain() && $src->getDomain() !== $domain) { continue 2; } } if ($parent->hasAttribute('rel') && (string) $parent->getAttribute('rel') === 'nofollow') { continue 2; } break; } $parent = $parent->parentNode; } $bag->add('images', ['url' => $src->getUrl(), 'alt' => $img->hasAttribute('alt') ? $img->getAttribute('alt') : '', 'href' => self::extractA($img, $bag->get('request_url'))]); } } } }