function submit_content(&$a) { //Decode the URL. $url = hex2bin(notags(trim($_GET['url']))); //Currently we simply push RAW URL's to our targets. sync_push($url); //Run the submit sequence. run_submit($url); exit; }
function admin_post(&$a) { //Submit a profile URL. if ($_POST['submit_url']) { goaway($a->get_baseurl() . '/submit?url=' . bin2hex($_POST['submit_url'])); } //Get our input. $url = $_POST['dir_import_url']; $page = intval($_POST['dir_page']); $batch = $_POST['batch_submit']; //Directory $file = realpath(__DIR__ . '/..') . '/.htimport'; //Per batch setting. $perPage = 200; $perBatch = 2; if ($batch) { require_once 'include/submit.php'; require_once 'include/site-health.php'; //First get all data from file. $data = file_get_contents($file); $list = explode("\r\n", $data); //Fresh batch? if (!isset($_SESSION['import_progress'])) { $_SESSION['import_progress'] = true; $_SESSION['import_success'] = 0; $_SESSION['import_failed'] = 0; $_SESSION['import_total'] = 0; notice("Started new batch. "); } //Make sure we can use try catch for all sorts of errors. set_error_handler(function ($errno, $errstr = '', $errfile = '', $errline = '', $context = array()) { if ((error_reporting() & $errno) == 0) { return; } throw new \Exception($errstr, $errno); }); for ($i = 0; $i < $perBatch; $i++) { if ($url = array_shift($list)) { set_time_limit(15); $_SESSION['import_total']++; $_SESSION['import_failed']++; try { //A site may well turn 'sour' during the import. //Check the health again for this reason. $site = parse_site_from_url($url); $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site); if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) { continue; } //Do the submit if health is ok. if (run_submit($url)) { $_SESSION['import_failed']--; $_SESSION['import_success']++; } } catch (\Exception $ex) { /* We tried... */ } } else { break; } } $left = count($list); $success = $_SESSION['import_success']; $skipped = $_SESSION['import_skipped']; $total = $_SESSION['import_total']; $errors = $_SESSION['import_failed']; if ($left > 0) { notice("{$left} items left in batch...<br>{$success} updated profiles.<br>{$errors} import errors."); file_put_contents($file, implode("\r\n", $list)); $fid = uniqid('autosubmit_'); echo '<form method="POST" id="' . $fid . '"><input type="hidden" name="batch_submit" value="1"></form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 300);</script>'; } else { notice("Completed batch! {$success} updated. {$errors} errors."); unlink($file); unset($_SESSION['import_progress']); } return; } elseif ($url) { require_once 'include/site-health.php'; $result = fetch_url($url . "/lsearch?p={$page}&n={$perPage}&search=.*"); if ($result) { $data = json_decode($result); } else { $data = false; } if ($data) { $rows = ''; foreach ($data->results as $profile) { //Skip known profiles. $purl = $profile->url; $nurl = str_replace(array('https:', '//www.'), array('http:', '//'), $purl); $r = q("SELECT count(*) as `matched` FROM `profile` WHERE (`homepage` = '%s' OR `nurl` = '%s') LIMIT 1", dbesc($purl), dbesc($nurl)); if (count($r) && $r[0]['matched']) { continue; } else { $site = parse_site_from_url($purl); $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site); if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) { continue; } } $rows .= $profile->url . "\r\n"; } file_put_contents($file, $rows, $page > 0 ? FILE_APPEND : 0); $progress = min(($page + 1) * $perPage, $data->total); notice("Imported " . $progress . "/" . $data->total . " URLs."); if ($progress !== $data->total) { $fid = uniqid('autosubmit_'); echo '<form method="POST" id="' . $fid . '">' . '<input type="hidden" name="dir_import_url" value="' . $url . '">' . '<input type="hidden" name="dir_page" value="' . ($page + 1) . '">' . '</form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 500);</script>'; } else { goaway($a->get_baseurl() . '/admin'); } } } }
/** * For a single fork during the pull jobs. * Takes a lower priority and pulls a batch of items. * @param int $i The index number of this worker (for round-robin). * @param int $threadc The amount of workers (for round-robin). * @param array $pull_batch A batch of URL's to pull. * @param string $db_host DB host to connect to. * @param string $db_user DB user to connect with. * @param string $db_pass DB pass to connect with. * @param mixed $db_data Nobody knows. * @param mixed $install Maybe a boolean. * @return void */ function pull_worker($i, $threadc, $pull_batch, $db_host, $db_user, $db_pass, $db_data, $install) { //Lets be nice, we're only doing maintenance here... pcntl_setpriority(5); //Get personal DBA's. global $db; $db = new dba($db_host, $db_user, $db_pass, $db_data, $install); //Get our (round-robin) workload from the batch. $workload = array(); while (isset($pull_batch[$i])) { $entry = $pull_batch[$i]; $workload[] = $entry; $i += $threadc; } //While we've got work to do. while (count($workload)) { $entry = array_pop($workload); set_time_limit(20); //This should work for 1 submit. msg("Submitting " . $entry['url']); run_submit($entry['url']); } }
$myIndex = $i + 1; $workload = array(); while (isset($res[$i])) { $entry = $res[$i]; $workload[] = $entry; $ids[] = $entry['id']; $i += $threadc; } while (count($workload)) { $entry = array_pop($workload); set_time_limit(20); //This should work for 1 submit. if ($verbose) { echo "Submitting " . $entry['homepage'] . PHP_EOL; } run_submit($entry['homepage']); } exit; } else { foreach ($threads as $pid) { pcntl_waitpid($pid, $status); if ($status !== 0) { if ($verbose) { echo "Bad process return value {$pid}:{$status}" . PHP_EOL; } logger("Bad process return value {$pid}:{$status}"); } } $time = time() - $start_maintain; if ($verbose) { echo "Maintenance completed. Took {$time} seconds." . PHP_EOL;