示例#1
0
文件: submit.php 项目: rabuzarus/dir
function submit_content(&$a)
{
    //Decode the URL.
    $url = hex2bin(notags(trim($_GET['url'])));
    //Currently we simply push RAW URL's to our targets.
    sync_push($url);
    //Run the submit sequence.
    run_submit($url);
    exit;
}
示例#2
0
文件: admin.php 项目: rabuzarus/dir
function admin_post(&$a)
{
    //Submit a profile URL.
    if ($_POST['submit_url']) {
        goaway($a->get_baseurl() . '/submit?url=' . bin2hex($_POST['submit_url']));
    }
    //Get our input.
    $url = $_POST['dir_import_url'];
    $page = intval($_POST['dir_page']);
    $batch = $_POST['batch_submit'];
    //Directory
    $file = realpath(__DIR__ . '/..') . '/.htimport';
    //Per batch setting.
    $perPage = 200;
    $perBatch = 2;
    if ($batch) {
        require_once 'include/submit.php';
        require_once 'include/site-health.php';
        //First get all data from file.
        $data = file_get_contents($file);
        $list = explode("\r\n", $data);
        //Fresh batch?
        if (!isset($_SESSION['import_progress'])) {
            $_SESSION['import_progress'] = true;
            $_SESSION['import_success'] = 0;
            $_SESSION['import_failed'] = 0;
            $_SESSION['import_total'] = 0;
            notice("Started new batch. ");
        }
        //Make sure we can use try catch for all sorts of errors.
        set_error_handler(function ($errno, $errstr = '', $errfile = '', $errline = '', $context = array()) {
            if ((error_reporting() & $errno) == 0) {
                return;
            }
            throw new \Exception($errstr, $errno);
        });
        for ($i = 0; $i < $perBatch; $i++) {
            if ($url = array_shift($list)) {
                set_time_limit(15);
                $_SESSION['import_total']++;
                $_SESSION['import_failed']++;
                try {
                    //A site may well turn 'sour' during the import.
                    //Check the health again for this reason.
                    $site = parse_site_from_url($url);
                    $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site);
                    if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) {
                        continue;
                    }
                    //Do the submit if health is ok.
                    if (run_submit($url)) {
                        $_SESSION['import_failed']--;
                        $_SESSION['import_success']++;
                    }
                } catch (\Exception $ex) {
                    /* We tried... */
                }
            } else {
                break;
            }
        }
        $left = count($list);
        $success = $_SESSION['import_success'];
        $skipped = $_SESSION['import_skipped'];
        $total = $_SESSION['import_total'];
        $errors = $_SESSION['import_failed'];
        if ($left > 0) {
            notice("{$left} items left in batch...<br>{$success} updated profiles.<br>{$errors} import errors.");
            file_put_contents($file, implode("\r\n", $list));
            $fid = uniqid('autosubmit_');
            echo '<form method="POST" id="' . $fid . '"><input type="hidden" name="batch_submit" value="1"></form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 300);</script>';
        } else {
            notice("Completed batch! {$success} updated. {$errors} errors.");
            unlink($file);
            unset($_SESSION['import_progress']);
        }
        return;
    } elseif ($url) {
        require_once 'include/site-health.php';
        $result = fetch_url($url . "/lsearch?p={$page}&n={$perPage}&search=.*");
        if ($result) {
            $data = json_decode($result);
        } else {
            $data = false;
        }
        if ($data) {
            $rows = '';
            foreach ($data->results as $profile) {
                //Skip known profiles.
                $purl = $profile->url;
                $nurl = str_replace(array('https:', '//www.'), array('http:', '//'), $purl);
                $r = q("SELECT count(*) as `matched` FROM `profile` WHERE (`homepage` = '%s' OR `nurl` = '%s') LIMIT 1", dbesc($purl), dbesc($nurl));
                if (count($r) && $r[0]['matched']) {
                    continue;
                } else {
                    $site = parse_site_from_url($purl);
                    $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site);
                    if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) {
                        continue;
                    }
                }
                $rows .= $profile->url . "\r\n";
            }
            file_put_contents($file, $rows, $page > 0 ? FILE_APPEND : 0);
            $progress = min(($page + 1) * $perPage, $data->total);
            notice("Imported " . $progress . "/" . $data->total . " URLs.");
            if ($progress !== $data->total) {
                $fid = uniqid('autosubmit_');
                echo '<form method="POST" id="' . $fid . '">' . '<input type="hidden" name="dir_import_url" value="' . $url . '">' . '<input type="hidden" name="dir_page" value="' . ($page + 1) . '">' . '</form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 500);</script>';
            } else {
                goaway($a->get_baseurl() . '/admin');
            }
        }
    }
}
示例#3
0
文件: sync.php 项目: rabuzarus/dir
/**
 * For a single fork during the pull jobs.
 * Takes a lower priority and pulls a batch of items.
 * @param  int    $i          The index number of this worker (for round-robin).
 * @param  int    $threadc    The amount of workers (for round-robin).
 * @param  array  $pull_batch A batch of URL's to pull.
 * @param  string $db_host    DB host to connect to.
 * @param  string $db_user    DB user to connect with.
 * @param  string $db_pass    DB pass to connect with.
 * @param  mixed  $db_data    Nobody knows.
 * @param  mixed  $install    Maybe a boolean.
 * @return void
 */
function pull_worker($i, $threadc, $pull_batch, $db_host, $db_user, $db_pass, $db_data, $install)
{
    //Lets be nice, we're only doing maintenance here...
    pcntl_setpriority(5);
    //Get personal DBA's.
    global $db;
    $db = new dba($db_host, $db_user, $db_pass, $db_data, $install);
    //Get our (round-robin) workload from the batch.
    $workload = array();
    while (isset($pull_batch[$i])) {
        $entry = $pull_batch[$i];
        $workload[] = $entry;
        $i += $threadc;
    }
    //While we've got work to do.
    while (count($workload)) {
        $entry = array_pop($workload);
        set_time_limit(20);
        //This should work for 1 submit.
        msg("Submitting " . $entry['url']);
        run_submit($entry['url']);
    }
}
示例#4
0
    $myIndex = $i + 1;
    $workload = array();
    while (isset($res[$i])) {
        $entry = $res[$i];
        $workload[] = $entry;
        $ids[] = $entry['id'];
        $i += $threadc;
    }
    while (count($workload)) {
        $entry = array_pop($workload);
        set_time_limit(20);
        //This should work for 1 submit.
        if ($verbose) {
            echo "Submitting " . $entry['homepage'] . PHP_EOL;
        }
        run_submit($entry['homepage']);
    }
    exit;
} else {
    foreach ($threads as $pid) {
        pcntl_waitpid($pid, $status);
        if ($status !== 0) {
            if ($verbose) {
                echo "Bad process return value {$pid}:{$status}" . PHP_EOL;
            }
            logger("Bad process return value {$pid}:{$status}");
        }
    }
    $time = time() - $start_maintain;
    if ($verbose) {
        echo "Maintenance completed. Took {$time} seconds." . PHP_EOL;