/** * Static method to crawl the URLs * * @param string $url * @param array $elements * @param string $parent * @param string $start * @param string $time * @return void */ public static function crawl($url, $elements = null, $parent = null, $start = null, $time = null) { // Encode the URL $url = str_replace(array('%3A', '%2F', '%23', '%3F', '%3D', '%25', '%2B'), array(':', '/', '#', '?', '=', '%', '+'), rawurlencode($url)); $slashes = substr_count($url, '/') - 2; if ($slashes > self::$depth) { self::$depth = $slashes; } if (!array_key_exists($url, self::$urls) && !array_key_exists(strtolower($url), self::$urls)) { $spider = new Spider($url, $elements); echo '-> (' . $spider->getCode() . ') ' . $url . PHP_EOL; if ($spider->isError()) { self::$errors[] = array('code' => $spider->getCode(), 'url' => $url, 'parent' => $parent); } else { self::$urls[$url] = $spider; $domain = str_replace(self::$urls[$url]->getSchema(), '', self::$urls[$url]->getBase()); if (strpos($domain, '/') !== false) { $domain = substr($domain, 0, strpos($domain, '/')); } $urls = self::$urls[$url]->getElements('a'); if (null !== $urls) { foreach ($urls as $u) { $expired = false; if (null !== $start && null !== $time) { $expired = time() - $start > $time; } if (!$expired && null !== $u['href'] && $u['href'] != '' && substr($u['href'], 0, 1) != '#' && substr($u['href'], 0, 1) != '?' && stripos($u['href'], $domain) !== false) { self::crawl($u['href'], $elements, $url, $start, $time); } } } } } }