function submit($option) { global $stack, $mainframe; // get values from gui of script $website = JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); if (substr($website, -1) != "/") { $website = $website . "/"; } $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML); $forbidden_types = toArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $exclude_names = toArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority >= 1 ? $priority = "1.0" : null; $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout); if (substr($page_root, -1) != "/") { $page_root = $page_root . "/"; } $robots = @JFile::read($page_root . 'robots.txt'); preg_match_all("/Disallow:(.*?)\n/", $robots, $pos); if ($exclude_names[0] == "") { unset($exclude_names[0]); } foreach ($pos[1] as $disallow) { $disallow = trim($disallow); if (strpos($disallow, $website) === false) { $disallow = $website . $disallow; } $exclude_names[] = $disallow; } $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind"); foreach ($exclude_names as $name) { $name != "" ? $forbidden_strings[] = $name : null; } $stack = array(); $s = microtime(true); if ($whitelist == "yes") { AntiFloodControl($website); } $file = genSitemap($priority, getlinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website); writeXML($file, $sitemap_file, $option, $sitemap_url); writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url); $mainframe->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds"); if ($modifyrobots == 1) { modifyrobots($sitemap_url, $page_root); } HTML_jcrawler::showNotifyForm($option, $sitemap_url); }
function showNotifyForm($option, $sitemap_url) { ?> <div style="position:absolute; left:450px; width:220px; float:left; clear:right;"> <fieldset style="padding: 10; width:200px; border-color:#000099; border-width:2px; border-style:solid; "> <legend style="color:#000099;">Options</legend> <ul><li><a href="http://www.w3.org/2001/03/webdata/xsv?docAddrs=<?php echo urlencode($sitemap_url); ?> &warnings=on&style=xsl" target="_blank">Validate my sitemap</a></li> <li><a href="<?php echo $sitemap_url; ?> " target="_blank">View my sitemap</a></li> <li><a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes" target="_blank">List of HTTP status codes</a></li> <li><a href="https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=patrick%40support%2dmasters%2ech&item_name=Pixelschieber%20%2d%20JCrawler&no_shipping=0&no_note=1&tax=0¤cy_code=EUR&lc=CH&bn=PP%2dDonationsBF&charset=UTF%2d8" target="_blank">Donate via PayPal</a></li> <li><a href="http://www.google.com/support/webmasters/bin/topic.py?topic=8467" target="_blank">Official Sitemaps FAQ</a></li> </ul> </fieldset> </div> <div style="height:200px; width:300px; float:left;"> <form action="index.php" method="post" name="Jcrawler" enctype="multipart/form-data"> <fieldset style="padding: 10; width:300px; border-color:#000099; border-width:2px; border-style:solid; "> <legend style="color:#000099;"><b>Submit sitemap to</b></legend> <ul> <li><input type="checkbox" name="url[]" checked="checked" value="http://www.google.com/webmasters/sitemaps/ping?sitemap=<?php echo urlencode($sitemap_url); ?> " /> Google</li> <li><input type="checkbox" name="url[]" checked="checked" value="http://www.bing.com/webmaster/ping.aspx?siteMap=<?php echo urlencode($sitemap_url); ?> " /> Bing</li> <li><input type="checkbox" name="url[]" checked="checked" value="http://submissions.ask.com/ping?sitemap=<?php echo urlencode($sitemap_url); ?> " /> Ask.com</li> <br /><input type="Submit" value="Submit" name="submit"></ul> </fieldset> <input type="hidden" name="option" value="<?php echo $option; ?> " /> <input type="hidden" name="task" value="notify" /> <input type="hidden" name="hidemainmenu" value="0" /> <!-- <input type="hidden" name="client" value="<?php //echo $client; ?> " /--> </form> </div> <?php HTML_jcrawler::footer($option); }
/** * Main wrapper function for submit task * - get parameters from the form * - save config * - do several checks * - call main crawling function (getLinks) to get all the links at once * - complete it with priority information * - generate sitemap XML file * * @param string $option the component name * @return nothing */ function submit($option) { $db =& JFactory::getDBO(); $query = "TRUNCATE TABLE `#__jcrawler_urls`"; $db->setQuery($query); $db->query(); $app =& JFactory::getApplication(); // get parameters from gui of script if (!defined('HTTP_HOST')) { define('HTTP_HOST', JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); } $website = HTTP_HOST; if (substr($website, -1) != "/") { $website = $website . "/"; } $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML); $forbidden_types = toTrimmedArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $exclude_names = toTrimmedArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML)); $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML); if ($priority >= 1) { $priority = "1.0"; } $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout); if (substr($page_root, -1) != "/") { $page_root = $page_root . "/"; } $robots = @JFile::read($page_root . 'robots.txt'); preg_match_all("/Disallow:(.*?)\n/", $robots, $pos); if ($exclude_names[0] == "") { unset($exclude_names[0]); } foreach ($pos[1] as $disallow) { $disallow = trim($disallow); if (strpos($disallow, $website) === false) { $disallow = $website . $disallow; } $exclude_names[] = $disallow; } $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind"); foreach ($exclude_names as $name) { if ($name != "") { $forbidden_strings[] = $name; } } $s = microtime(true); if ($whitelist == "yes") { AntiFloodControl($website); } $file = genSitemap($priority, getLinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website); writeXML($file, $sitemap_file, $option, $sitemap_url); writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url); $app->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds"); if ($modifyrobots == 1) { modifyrobots($sitemap_url, $page_root); } require_once JApplicationHelper::getPath('admin_html', 'com_jcrawler'); HTML_jcrawler::showNotifyForm($option, $sitemap_url); }