Пример #1
0
function submit($option)
{
    global $stack, $mainframe;
    // get values from gui of script
    $website = JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    if (substr($website, -1) != "/") {
        $website = $website . "/";
    }
    $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $forbidden_types = toArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $exclude_names = toArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority >= 1 ? $priority = "1.0" : null;
    $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout);
    if (substr($page_root, -1) != "/") {
        $page_root = $page_root . "/";
    }
    $robots = @JFile::read($page_root . 'robots.txt');
    preg_match_all("/Disallow:(.*?)\n/", $robots, $pos);
    if ($exclude_names[0] == "") {
        unset($exclude_names[0]);
    }
    foreach ($pos[1] as $disallow) {
        $disallow = trim($disallow);
        if (strpos($disallow, $website) === false) {
            $disallow = $website . $disallow;
        }
        $exclude_names[] = $disallow;
    }
    $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind");
    foreach ($exclude_names as $name) {
        $name != "" ? $forbidden_strings[] = $name : null;
    }
    $stack = array();
    $s = microtime(true);
    if ($whitelist == "yes") {
        AntiFloodControl($website);
    }
    $file = genSitemap($priority, getlinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website);
    writeXML($file, $sitemap_file, $option, $sitemap_url);
    writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url);
    $mainframe->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds");
    if ($modifyrobots == 1) {
        modifyrobots($sitemap_url, $page_root);
    }
    HTML_jcrawler::showNotifyForm($option, $sitemap_url);
}
Пример #2
0
    function showNotifyForm($option, $sitemap_url)
    {
        ?>
	        <div style="position:absolute; left:450px; width:220px; float:left; clear:right;">
<fieldset style="padding: 10; width:200px; border-color:#000099; border-width:2px; border-style:solid; ">
            	<legend style="color:#000099;">Options</legend>
        		<ul><li><a href="http://www.w3.org/2001/03/webdata/xsv?docAddrs=<?php 
        echo urlencode($sitemap_url);
        ?>
&warnings=on&style=xsl" target="_blank">Validate my sitemap</a></li>
					<li><a href="<?php 
        echo $sitemap_url;
        ?>
" target="_blank">View my sitemap</a></li>
                    <li><a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes" target="_blank">List of HTTP status codes</a></li>
                    <li><a href="https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=patrick%40support%2dmasters%2ech&item_name=Pixelschieber%20%2d%20JCrawler&no_shipping=0&no_note=1&tax=0&currency_code=EUR&lc=CH&bn=PP%2dDonationsBF&charset=UTF%2d8" target="_blank">Donate via PayPal</a></li>
                    <li><a href="http://www.google.com/support/webmasters/bin/topic.py?topic=8467" target="_blank">Official Sitemaps FAQ</a></li>
  </ul>
       	  </fieldset>
        </div>
	<div style="height:200px; width:300px; float:left;">
<form action="index.php" method="post" name="Jcrawler" enctype="multipart/form-data">
	<fieldset style="padding: 10; width:300px; border-color:#000099; border-width:2px; border-style:solid; ">
	<legend style="color:#000099;"><b>Submit sitemap to</b></legend>	
	<ul>
<li><input type="checkbox" name="url[]" checked="checked" value="http://www.google.com/webmasters/sitemaps/ping?sitemap=<?php 
        echo urlencode($sitemap_url);
        ?>
" /> Google</li>
<li><input type="checkbox" name="url[]" checked="checked" value="http://www.bing.com/webmaster/ping.aspx?siteMap=<?php 
        echo urlencode($sitemap_url);
        ?>
" /> Bing</li>
<li><input type="checkbox" name="url[]" checked="checked" value="http://submissions.ask.com/ping?sitemap=<?php 
        echo urlencode($sitemap_url);
        ?>
" /> Ask.com</li>
                                                
         <br /><input type="Submit" value="Submit" name="submit"></ul>	</fieldset>
	
		<input type="hidden" name="option" value="<?php 
        echo $option;
        ?>
" />
		<input type="hidden" name="task" value="notify" />
		<input type="hidden" name="hidemainmenu" value="0" />
		<!-- <input type="hidden" name="client" value="<?php 
        //echo $client;
        ?>
" /-->
		</form>
	</div>
    
 <?php 
        HTML_jcrawler::footer($option);
    }
Пример #3
0
/**
 * Main wrapper function for submit task
 * - get parameters from the form
 * - save config
 * - do several checks 
 * - call main crawling function (getLinks) to get all the links at once
 * - complete it with priority information
 * - generate sitemap XML file
 * 
 * @param   string $option  the component name
 * @return  nothing 
 */
function submit($option)
{
    $db =& JFactory::getDBO();
    $query = "TRUNCATE TABLE `#__jcrawler_urls`";
    $db->setQuery($query);
    $db->query();
    $app =& JFactory::getApplication();
    // get parameters from gui of script
    if (!defined('HTTP_HOST')) {
        define('HTTP_HOST', JRequest::getVar('http_host', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    }
    $website = HTTP_HOST;
    if (substr($website, -1) != "/") {
        $website = $website . "/";
    }
    $page_root = JRequest::getVar('document_root', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_file = $page_root . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_url = $website . JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $sitemap_form = JRequest::getVar('sitemap_url', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $priority = JRequest::getVar('priority', '1.0', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $forbidden_types = toTrimmedArray(JRequest::getVar('forbidden_types', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $exclude_names = toTrimmedArray(JRequest::getVar('exclude_names', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML));
    $freq = JRequest::getVar('freq', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $modifyrobots = JRequest::getVar('robots', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $method = JRequest::getVar('method', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $level = JRequest::getVar('levels', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $maxcon = JRequest::getVar('maxcon', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $timeout = JRequest::getVar('timeout', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    $whitelist = JRequest::getVar('whitelist', 'none', 'POST', 'STRING', JREQUEST_ALLOWHTML);
    if ($priority >= 1) {
        $priority = "1.0";
    }
    $xmlconfig = genConfig($priority, $forbidden_types, $exclude_names, $freq, $method, $level, $maxcon, $sitemap_form, $page_root, $timeout);
    if (substr($page_root, -1) != "/") {
        $page_root = $page_root . "/";
    }
    $robots = @JFile::read($page_root . 'robots.txt');
    preg_match_all("/Disallow:(.*?)\n/", $robots, $pos);
    if ($exclude_names[0] == "") {
        unset($exclude_names[0]);
    }
    foreach ($pos[1] as $disallow) {
        $disallow = trim($disallow);
        if (strpos($disallow, $website) === false) {
            $disallow = $website . $disallow;
        }
        $exclude_names[] = $disallow;
    }
    $forbidden_strings = array("print=1", "format=pdf", "option=com_mailto", "component/mailto", "/mailto/", "mailto:", "login", "register", "reset", "remind");
    foreach ($exclude_names as $name) {
        if ($name != "") {
            $forbidden_strings[] = $name;
        }
    }
    $s = microtime(true);
    if ($whitelist == "yes") {
        AntiFloodControl($website);
    }
    $file = genSitemap($priority, getLinks($website, $forbidden_types, $level, $forbidden_strings, $method, $maxcon, $timeout), $freq, $website);
    writeXML($file, $sitemap_file, $option, $sitemap_url);
    writeXML($xmlconfig, $page_root . "/administrator/components/com_jcrawler/config.xml", $option, $sitemap_url);
    $app->enqueueMessage("total time: " . round(microtime(true) - $s, 4) . " seconds");
    if ($modifyrobots == 1) {
        modifyrobots($sitemap_url, $page_root);
    }
    require_once JApplicationHelper::getPath('admin_html', 'com_jcrawler');
    HTML_jcrawler::showNotifyForm($option, $sitemap_url);
}