function Scan($url) { global $scanned, $pf, $extension, $skip, $freq, $priority; echo "scan url {$url}\n"; array_push($scanned, $url); $html = GetUrl($url); $a1 = explode("<a", $html); foreach ($a1 as $key => $val) { $parts = explode(">", $val); $a = $parts[0]; $aparts = explode("href=", $a); $hrefparts = explode(" ", $aparts[1]); $hrefparts2 = explode("#", $hrefparts[0]); $href = str_replace("\"", "", $hrefparts2[0]); if (substr($href, 0, 7) != "http://" && substr($href, 0, 8) != "https://" && substr($href, 0, 6) != "ftp://") { if ($href[0] == '/') { $href = "{$scanned['0']}{$href}"; } else { $href = Path($url) . $href; } } if (substr($href, 0, strlen($scanned[0])) == $scanned[0]) { $ignore = false; if (isset($skip)) { foreach ($skip as $k => $v) { if (substr($href, 0, strlen($v)) == $v) { $ignore = true; } } } if (!$ignore && !in_array($href, $scanned) && strpos($href, $extension) > 0) { fwrite($pf, "<url>\n <loc>{$href}</loc>\n" . " <changefreq>{$freq}</changefreq>\n" . " <priority>{$priority}</priority>\n</url>\n"); echo $href . "\n"; Scan($href); } } } }
function Scan($url) { global $scanned, $pf, $extension, $skip, $freq, $priority; echo $url . NL; array_push($scanned, $url); $html = GetUrl($url); $a1 = explode("<a", $html); foreach ($a1 as $val) { $anker_parts = explode(">", $val); $a = $anker_parts[0]; $href_split = explode("href=", $a); $href_string = $href_split[1]; if ($href_string[0] == '"') { $next_url = GetQuotedUrl($href_string); } else { $spaces_split = explode(" ", $href_string); $next_url = str_replace("\"", "", $spaces_split[0]); } $fragment_split = explode("#", $next_url); $next_url = $fragment_split[0]; if (substr($next_url, 0, 7) != "http://" && substr($next_url, 0, 8) != "https://" && substr($next_url, 0, 6) != "ftp://" && substr($next_url, 0, 7) != "mailto:") { if ($next_url[0] == '/') { $next_url = "{$scanned['0']}{$next_url}"; } else { $next_url = Path($url) . $next_url; } } if (substr($next_url, 0, strlen($scanned[0])) == $scanned[0]) { $ignore = false; if (isset($skip)) { foreach ($skip as $v) { if (substr($next_url, 0, strlen($v)) == $v) { $ignore = true; } } } if (!$ignore && !in_array($next_url, $scanned)) { foreach ($extension as $ext) { //if (strpos ($next_url, $ext) < 0) //{ fwrite($pf, " <url>\n" . " <loc>" . htmlentities($next_url) . "</loc>\n" . " <changefreq>{$freq}</changefreq>\n" . " <priority>{$priority}</priority>\n" . " </url>\n"); Scan($next_url); //} } } } } }