function health_summary(&$a) { $sites = array(); //Find the user count per site. $r = q("SELECT `homepage` FROM `profile` WHERE 1"); if (count($r)) { foreach ($r as $rr) { $site = parse_site_from_url($rr['homepage']); if ($site) { if (!isset($sites[$site])) { $sites[$site] = 0; } $sites[$site]++; } } } //See if we have a health for them. $sites_with_health = array(); $site_healths = array(); $r = q("SELECT * FROM `site-health` WHERE `reg_policy`='REGISTER_OPEN'"); if (count($r)) { foreach ($r as $rr) { $sites_with_health[$rr['base_url']] = ($sites[$rr['base_url']] / 100 + 10) * intval($rr['health_score']); $site_healths[$rr['base_url']] = $rr; } } arsort($sites_with_health); $total = 0; $public_sites = ''; foreach ($sites_with_health as $k => $v) { //Stop at unhealthy sites. $site = $site_healths[$k]; if ($site['health_score'] <= 20) { break; } //Skip small sites. $users = $sites[$k]; if ($users < 10) { continue; } $public_sites .= '<span class="health ' . health_score_to_name($site['health_score']) . '">♥</span> ' . '<a href="/health/' . $site['id'] . '">' . $k . '</a> ' . '(' . $users . ')' . "<br />\r\n"; $total++; } $public_sites .= "<br>Total: {$total}<br />\r\n"; $tpl .= file_get_contents('view/health_summary.tpl'); return replace_macros($tpl, array('$versions' => $versions, '$public_sites' => $public_sites)); }
function admin_post(&$a) { //Submit a profile URL. if ($_POST['submit_url']) { goaway($a->get_baseurl() . '/submit?url=' . bin2hex($_POST['submit_url'])); } //Get our input. $url = $_POST['dir_import_url']; $page = intval($_POST['dir_page']); $batch = $_POST['batch_submit']; //Directory $file = realpath(__DIR__ . '/..') . '/.htimport'; //Per batch setting. $perPage = 200; $perBatch = 2; if ($batch) { require_once 'include/submit.php'; require_once 'include/site-health.php'; //First get all data from file. $data = file_get_contents($file); $list = explode("\r\n", $data); //Fresh batch? if (!isset($_SESSION['import_progress'])) { $_SESSION['import_progress'] = true; $_SESSION['import_success'] = 0; $_SESSION['import_failed'] = 0; $_SESSION['import_total'] = 0; notice("Started new batch. "); } //Make sure we can use try catch for all sorts of errors. set_error_handler(function ($errno, $errstr = '', $errfile = '', $errline = '', $context = array()) { if ((error_reporting() & $errno) == 0) { return; } throw new \Exception($errstr, $errno); }); for ($i = 0; $i < $perBatch; $i++) { if ($url = array_shift($list)) { set_time_limit(15); $_SESSION['import_total']++; $_SESSION['import_failed']++; try { //A site may well turn 'sour' during the import. //Check the health again for this reason. $site = parse_site_from_url($url); $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site); if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) { continue; } //Do the submit if health is ok. if (run_submit($url)) { $_SESSION['import_failed']--; $_SESSION['import_success']++; } } catch (\Exception $ex) { /* We tried... */ } } else { break; } } $left = count($list); $success = $_SESSION['import_success']; $skipped = $_SESSION['import_skipped']; $total = $_SESSION['import_total']; $errors = $_SESSION['import_failed']; if ($left > 0) { notice("{$left} items left in batch...<br>{$success} updated profiles.<br>{$errors} import errors."); file_put_contents($file, implode("\r\n", $list)); $fid = uniqid('autosubmit_'); echo '<form method="POST" id="' . $fid . '"><input type="hidden" name="batch_submit" value="1"></form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 300);</script>'; } else { notice("Completed batch! {$success} updated. {$errors} errors."); unlink($file); unset($_SESSION['import_progress']); } return; } elseif ($url) { require_once 'include/site-health.php'; $result = fetch_url($url . "/lsearch?p={$page}&n={$perPage}&search=.*"); if ($result) { $data = json_decode($result); } else { $data = false; } if ($data) { $rows = ''; foreach ($data->results as $profile) { //Skip known profiles. $purl = $profile->url; $nurl = str_replace(array('https:', '//www.'), array('http:', '//'), $purl); $r = q("SELECT count(*) as `matched` FROM `profile` WHERE (`homepage` = '%s' OR `nurl` = '%s') LIMIT 1", dbesc($purl), dbesc($nurl)); if (count($r) && $r[0]['matched']) { continue; } else { $site = parse_site_from_url($purl); $r = q("SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1", $site); if (count($r) && intval($r[0]['health_score']) < $a->config['site-health']['skip_import_threshold']) { continue; } } $rows .= $profile->url . "\r\n"; } file_put_contents($file, $rows, $page > 0 ? FILE_APPEND : 0); $progress = min(($page + 1) * $perPage, $data->total); notice("Imported " . $progress . "/" . $data->total . " URLs."); if ($progress !== $data->total) { $fid = uniqid('autosubmit_'); echo '<form method="POST" id="' . $fid . '">' . '<input type="hidden" name="dir_import_url" value="' . $url . '">' . '<input type="hidden" name="dir_page" value="' . ($page + 1) . '">' . '</form>' . '<script type="text/javascript">setTimeout(function(){ document.getElementById("' . $fid . '").submit(); }, 500);</script>'; } else { goaway($a->get_baseurl() . '/admin'); } } } }
function run_site_probe($id, &$entry_out) { global $a; //Get the site information from the DB, based on the ID. $result = q("SELECT * FROM `site-health` WHERE `id`= %u ORDER BY `id` ASC LIMIT 1", intval($id)); //Abort the probe if site is not known. if (!$result || !isset($result[0])) { logger('Unknown site-health ID being probed: ' . $id); throw new \Exception('Unknown site-health ID being probed: ' . $id); } //Shortcut. $entry = $result[0]; $base_url = $entry['base_url']; $probe_location = $base_url . '/friendica/json'; //Prepare the CURL call. $handle = curl_init(); $options = array(CURLOPT_TIMEOUT => max($a->config['site-health']['probe_timeout'], 1), CURLOPT_CONNECTTIMEOUT => 1, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 8, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, CURLOPT_USERAGENT => 'friendica-directory-probe-0.1', CURLOPT_RETURNTRANSFER => true, CURLOPT_URL => $probe_location); curl_setopt_array($handle, $options); //Probe the site. $probe_start = microtime(true); $probe_data = curl_exec($handle); $probe_end = microtime(true); //Check for SSL problems. $curl_statuscode = curl_errno($handle); $sslcert_issues = in_array($curl_statuscode, array(60, 83)); //When it's the certificate that doesn't work. if ($sslcert_issues) { //Probe again, without strict SSL. $options[CURLOPT_SSL_VERIFYPEER] = false; //Replace the handler. curl_close($handle); $handle = curl_init(); curl_setopt_array($handle, $options); //Probe. $probe_start = microtime(true); $probe_data = curl_exec($handle); $probe_end = microtime(true); //Store new status. $curl_statuscode = curl_errno($handle); } //Gather more meta. $time = round(($probe_end - $probe_start) * 1000); $status = curl_getinfo($handle, CURLINFO_HTTP_CODE); $type = curl_getinfo($handle, CURLINFO_CONTENT_TYPE); $effective_url = curl_getinfo($handle, CURLINFO_EFFECTIVE_URL); //Done with CURL now. curl_close($handle); #TODO: if the site redirects elsewhere, notice this site and record an issue. $wrong_base_url = parse_site_from_url($effective_url) !== $entry['base_url']; try { $data = json_decode($probe_data); } catch (\Exception $ex) { $data = false; } $parse_failed = !$data; $parsedDataQuery = ''; if (!$parse_failed) { $given_base_url_match = $data->url == $base_url; //Record the probe speed in a probes table. q("INSERT INTO `site-probe` (`site_health_id`, `dt_performed`, `request_time`)" . "VALUES (%u, NOW(), %u)", $entry['id'], $time); //Update any health calculations or otherwise processed data. $parsedDataQuery = sprintf("`dt_last_seen` = NOW(),\n `name` = '%s',\n `version` = '%s',\n `plugins` = '%s',\n `reg_policy` = '%s',\n `info` = '%s',\n `admin_name` = '%s',\n `admin_profile` = '%s',\n ", dbesc($data->site_name), dbesc($data->version), dbesc(implode("\r\n", $data->plugins)), dbesc($data->register_policy), dbesc($data->info), dbesc($data->admin->name), dbesc($data->admin->profile)); //Did we use HTTPS? $urlMeta = parse_url($probe_location); if ($urlMeta['scheme'] == 'https') { $parsedDataQuery .= sprintf("`ssl_state` = b'%u',", $sslcert_issues ? '0' : '1'); } else { $parsedDataQuery .= "`ssl_state` = NULL,"; } //Do we have a no scrape supporting node? :D if (isset($data->no_scrape_url)) { $parsedDataQuery .= sprintf("`no_scrape_url` = '%s',", dbesc($data->no_scrape_url)); } } //Get the new health. $version = $parse_failed ? '' : $data->version; $health = health_score_after_probe($entry['health_score'], !$parse_failed, $time, $version, $sslcert_issues); //Update the health. q("UPDATE `site-health` SET\n `health_score` = '%d',\n {$parsedDataQuery}\n `dt_last_probed` = NOW()\n WHERE `id` = %d LIMIT 1", $health, $entry['id']); //Get the site information from the DB, based on the ID. $result = q("SELECT * FROM `site-health` WHERE `id`= %u ORDER BY `id` ASC LIMIT 1", $entry['id']); //Return updated entry data. if ($result && isset($result[0])) { $entry_out = $result[0]; } }