Beispiel #1
0
function init()
{
    //processing calls
    $params = get_vars_from_input($_GET['address']);
    $article = analyst_visit($params);
    $article['body'] = analyst_clean_text($article['body']);
    $article['keywords'] = list_links_as_important($article['raw']);
    //database calls
    $db = db_connect();
    $article['links'] = db_check_for_linkages($db, format_list_for_db_search($article['keywords']));
    $article['title'] = db_log_title_into_db($db, $article['title']);
    db_log_linkages($db, $article);
    //double check your work
    db_clean_linkages($db);
    $db->close();
}
Beispiel #2
0
<?php

include 'logger.php';
$debug_mode = $_GET['s'];
$search_params = get_vars_from_input($debug_mode);
$target = crawler_make_validated_link_from($search_params);
$sublinks = crawler_list_related_links_from($search_params, $target);
if ($search_params['mode'] == 'interface') {
    echo $target . ';';
    echo implode(',', $sublinks);
}
/* *************************************************************** *\
	get_vars_from_input(url)
		
	based on url, determines if source is debug/interface/robot. 
	inits params with defaults accordingly
\* *************************************************************** */
function get_vars_from_input($debug_mode)
{
    $params = array();
    $params['s'] = $debug_mode ? $_GET['s'] : $_POST['s'];
    $params['max'] = $debug_mode ? 10 : $_POST['max'];
    $params['depth'] = $debug_mode ? 1 : $_POST['depth'];
    $params['debug'] = $debug_mode ? 1 : $_POST['debug'];
    $params['type'] = $debug_mode ? 'wikipedia' : $_POST['type'];
    $params['mode'] = $debug_mode ? 'debug' : 'interface';
    return $params;
}
/* *************************************************************** *\
	crawler_list_related_links_from(url)
		crawler_cut_body_from_data(raw text)