Example #1
0
function submit($option)
{
    global $stack, $mainframe;
    // get values from gui of script
    $website = JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    if (substr($website, -1) != "/") {
        $website = $website . "/";
    }
    $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $forbidden_types = toArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $exclude_names = toArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority >= 1 ? $priority = "1.0" : null;
    $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout);
    if (substr($page_root, -1) != "/") {
        $page_root = $page_root . "/";
    }
    $robots = @JFile::read($page_root . 'robots.txt');
    preg_match_all("/Disallow:(.*?)\n/", $robots, $pos);
    if ($exclude_names[0] == "") {
        unset($exclude_names[0]);
    }
    foreach ($pos[1] as $disallow) {
        $disallow = trim($disallow);
        if (strpos($disallow, $website) === false) {
            $disallow = $website . $disallow;
        }
        $exclude_names[] = $disallow;
    }
    $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind");
    foreach ($exclude_names as $name) {
        $name != "" ? $forbidden_strings[] = $name : null;
    }
    $stack = array();
    $s = microtime(true);
    if ($whitelist == "yes") {
        AntiFloodControl($website);
    }
    $file = genSitemap($priority, getlinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website);
    writeXML($file, $sitemap_file, $option, $sitemap_url);
    writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url);
    $mainframe->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds");
    if ($modifyrobots == 1) {
        modifyrobots($sitemap_url, $page_root);
    }
    HTML_jcrawler::showNotifyForm($option, $sitemap_url);
}
Example #2
0
/**
 * Main wrapper function for submit task
 * - get parameters from the form
 * - save config
 * - do several checks 
 * - call main crawling function (getLinks) to get all the links at once
 * - complete it with priority information
 * - generate sitemap XML file
 * 
 * @param   string $option  the component name
 * @return  nothing 
 */
function submit($option)
{
    $db =& JFactory::getDBO();
    $query = "TRUNCATE TABLE `#__jcrawler_urls`";
    $db->setQuery($query);
    $db->query();
    $app =& JFactory::getApplication();
    // get parameters from gui of script
    if (!defined('HTTP_HOST')) {
        define('HTTP_HOST', JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    }
    $website = HTTP_HOST;
    if (substr($website, -1) != "/") {
        $website = $website . "/";
    }
    $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $forbidden_types = toTrimmedArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $exclude_names = toTrimmedArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    if ($priority >= 1) {
        $priority = "1.0";
    }
    $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout);
    if (substr($page_root, -1) != "/") {
        $page_root = $page_root . "/";
    }
    $robots = @JFile::read($page_root . 'robots.txt');
    preg_match_all("/Disallow:(.*?)\n/", $robots, $pos);
    if ($exclude_names[0] == "") {
        unset($exclude_names[0]);
    }
    foreach ($pos[1] as $disallow) {
        $disallow = trim($disallow);
        if (strpos($disallow, $website) === false) {
            $disallow = $website . $disallow;
        }
        $exclude_names[] = $disallow;
    }
    $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind");
    foreach ($exclude_names as $name) {
        if ($name != "") {
            $forbidden_strings[] = $name;
        }
    }
    $s = microtime(true);
    if ($whitelist == "yes") {
        AntiFloodControl($website);
    }
    $file = genSitemap($priority, getLinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website);
    writeXML($file, $sitemap_file, $option, $sitemap_url);
    writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url);
    $app->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds");
    if ($modifyrobots == 1) {
        modifyrobots($sitemap_url, $page_root);
    }
    require_once JApplicationHelper::getPath('admin_html', 'com_jcrawler');
    HTML_jcrawler::showNotifyForm($option, $sitemap_url);
}