function submit($option) { global $stack, $mainframe; // get values from gui of script $website = JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); if (substr($website, -1) != "/") { $website = $website . "/"; } $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML); $forbidden_types = toArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $exclude_names = toArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority >= 1 ? $priority = "1.0" : null; $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout); if (substr($page_root, -1) != "/") { $page_root = $page_root . "/"; } $robots = @JFile::read($page_root . 'robots.txt'); preg_match_all("/Disallow:(.*?)\n/", $robots, $pos); if ($exclude_names[0] == "") { unset($exclude_names[0]); } foreach ($pos[1] as $disallow) { $disallow = trim($disallow); if (strpos($disallow, $website) === false) { $disallow = $website . $disallow; } $exclude_names[] = $disallow; } $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind"); foreach ($exclude_names as $name) { $name != "" ? $forbidden_strings[] = $name : null; } $stack = array(); $s = microtime(true); if ($whitelist == "yes") { AntiFloodControl($website); } $file = genSitemap($priority, getlinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website); writeXML($file, $sitemap_file, $option, $sitemap_url); writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url); $mainframe->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds"); if ($modifyrobots == 1) { modifyrobots($sitemap_url, $page_root); } HTML_jcrawler::showNotifyForm($option, $sitemap_url); }
function genereFileReferencement() { genRobot(); genSitemap(); genRss(); clear_cache(); }
/** * Main wrapper function for submit task * - get parameters from the form * - save config * - do several checks * - call main crawling function (getLinks) to get all the links at once * - complete it with priority information * - generate sitemap XML file * * @param string $option the component name * @return nothing */ function submit($option) { $db =& JFactory::getDBO(); $query = "TRUNCATE TABLE `#__jcrawler_urls`"; $db->setQuery($query); $db->query(); $app =& JFactory::getApplication(); // get parameters from gui of script if (!defined('HTTP_HOST')) { define('HTTP_HOST', JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); } $website = HTTP_HOST; if (substr($website, -1) != "/") { $website = $website . "/"; } $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML); $forbidden_types = toTrimmedArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $exclude_names = toTrimmedArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); if ($priority >= 1) { $priority = "1.0"; } $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout); if (substr($page_root, -1) != "/") { $page_root = $page_root . "/"; } $robots = @JFile::read($page_root . 'robots.txt'); preg_match_all("/Disallow:(.*?)\n/", $robots, $pos); if ($exclude_names[0] == "") { unset($exclude_names[0]); } foreach ($pos[1] as $disallow) { $disallow = trim($disallow); if (strpos($disallow, $website) === false) { $disallow = $website . $disallow; } $exclude_names[] = $disallow; } $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind"); foreach ($exclude_names as $name) { if ($name != "") { $forbidden_strings[] = $name; } } $s = microtime(true); if ($whitelist == "yes") { AntiFloodControl($website); } $file = genSitemap($priority, getLinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website); writeXML($file, $sitemap_file, $option, $sitemap_url); writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url); $app->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds"); if ($modifyrobots == 1) { modifyrobots($sitemap_url, $page_root); } require_once JApplicationHelper::getPath('admin_html', 'com_jcrawler'); HTML_jcrawler::showNotifyForm($option, $sitemap_url); }