function __construct($imgtag = '', $referer = '') { if (!$imgtag) { return; } $this->tag = $imgtag; // poster captures also HTML5 video thumbnails if (!preg_match('/(?:src|poster) *=["\'](.+?)["\']/i', $this->tag, $matches) && !preg_match('/(?:src|poster) *=([^ ]+)/i', $this->tag, $matches)) { // Some sites don't use quotes if (!preg_match('/["\']((http:){0,1}[\\.\\d\\w\\-\\/]+\\.jpg)["\']/i', $this->tag, $matches)) { return; } } else { // Avoid maps, headers and such if (preg_match('/usemap=|header/i', $this->tag)) { return; } } parent::__construct($matches[1], $referer); $this->type = 'local'; if (strlen($this->url) < 5 || WebThumb::$visited[$this->url]) { return; } WebThumb::$visited[$this->url] = true; // Avoid images generated by scripts with different IDs per page if ((!$this->referer || $this->parsed_referer['host'] != $this->parsed_url['host']) && !preg_match('/\\.jpg/', $this->url) && preg_match('#/.+?\\?.+?&.+?&.+#', $this->url)) { return; } if (preg_match('/[ "]width *[=:][ \'"]*(\\d+)/i', $this->tag, $match)) { $this->html_x = $this->x = intval($match[1]); $this->weight *= 1.5; // Give preference to images with img attributes } if (preg_match('/[ "]height *[=:][ \'"]*(\\d+)/i', $this->tag, $match)) { $this->html_y = $this->y = intval($match[1]); $this->weight *= 1.5; // Give preference to images with img attributes } // First filter to avoid downloading very small images if ($this->x > 0 && $this->x < 100 || $this->y > 0 && $this->y < 100) { return; } if (!preg_match('/loading|button|banner|\\Wads\\W|\\Wpub\\W|\\/ad\\/|\\/logo[\\/\\.]|header|rss|advertising/i', $this->url)) { $this->candidate = true; } }