Пример #1
0
    /**
     * Produces a table with overview of the URLs to be crawled for each page
     *
     * @return	string		HTML output
     */
    function drawURLs()
    {
        global $BACK_PATH, $BE_USER;
        // Init:
        $this->duplicateTrack = array();
        $this->submitCrawlUrls = t3lib_div::_GP('_crawl');
        $this->downloadCrawlUrls = t3lib_div::_GP('_download');
        $this->makeCrawlerProcessableChecks();
        switch ((string) t3lib_div::_GP('tstamp')) {
            case 'midnight':
                $this->scheduledTime = mktime(0, 0, 0);
                break;
            case '04:00':
                $this->scheduledTime = mktime(0, 0, 0) + 4 * 3600;
                break;
            case 'now':
            default:
                $this->scheduledTime = time();
                break;
        }
        // $this->reqMinute = t3lib_div::intInRange(t3lib_div::_GP('perminute'),1,10000);
        // TODO: check relevance
        $this->reqMinute = 1000;
        $this->incomingConfigurationSelection = t3lib_div::_GP('configurationSelection');
        $this->incomingConfigurationSelection = is_array($this->incomingConfigurationSelection) ? $this->incomingConfigurationSelection : array('');
        $this->crawlerObj = t3lib_div::makeInstance('tx_crawler_lib');
        $this->crawlerObj->setAccessMode('gui');
        $this->crawlerObj->setID = t3lib_div::md5int(microtime());
        if (empty($this->incomingConfigurationSelection) || count($this->incomingConfigurationSelection) == 1 && empty($this->incomingConfigurationSelection[0])) {
            $code = '
			<tr>
				<td colspan="7"><b>' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.noConfigSelected') . '</b></td>
			</tr>';
        } else {
            if ($this->submitCrawlUrls) {
                $reason = new tx_crawler_domain_reason();
                $reason->setReason(tx_crawler_domain_reason::REASON_GUI_SUBMIT);
                if ($BE_USER instanceof t3lib_beUserAuth) {
                    $username = $BE_USER->user['username'];
                }
                $reason->setDetailText('The user ' . $username . ' added pages to the crawler queue manually ');
                tx_crawler_domain_events_dispatcher::getInstance()->post('invokeQueueChange', $this->findCrawler()->setID, array('reason' => $reason));
            }
            $code = $this->crawlerObj->getPageTreeAndUrls($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], $this->scheduledTime, $this->reqMinute, $this->submitCrawlUrls, $this->downloadCrawlUrls, array(), $this->incomingConfigurationSelection);
        }
        $this->downloadUrls = $this->crawlerObj->downloadUrls;
        $this->duplicateTrack = $this->crawlerObj->duplicateTrack;
        $output = '';
        if ($code) {
            $output .= '<h3>' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.configuration') . ':</h3>';
            $output .= '<input type="hidden" name="id" value="' . intval($this->pObj->id) . '" />';
            if (!$this->submitCrawlUrls) {
                $output .= $this->drawURLs_cfgSelectors() . '<br />';
                $output .= '<input type="submit" name="_update" value="' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.triggerUpdate') . '" /> ';
                $output .= '<input type="submit" name="_crawl" value="' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.triggerCrawl') . '" /> ';
                $output .= '<input type="submit" name="_download" value="' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.triggerDownload') . '" /><br /><br />';
                $output .= $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.count') . ': ' . count(array_keys($this->duplicateTrack)) . '<br />';
                $output .= $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.curtime') . ': ' . date('H:i:s', time()) . '<br />';
                $output .= '<br />
					<table class="lrPadding c-list url-table">' . $this->drawURLs_printTableHeader() . $code . '</table>';
            } else {
                $output .= count(array_keys($this->duplicateTrack)) . ' ' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.submitted') . '. <br /><br />';
                $output .= '<input type="submit" name="_" value="' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.continue') . '" />';
                $output .= '<input type="submit" onclick="this.form.elements[\'SET[crawlaction]\'].value=\'log\';" value="' . $GLOBALS['LANG']->sL('LLL:EXT:crawler/modfunc1/locallang.xml:labels.continueinlog') . '" />';
            }
        }
        // Download Urls to crawl:
        if ($this->downloadCrawlUrls) {
            // Creating output header:
            $mimeType = 'application/octet-stream';
            Header('Content-Type: ' . $mimeType);
            Header('Content-Disposition: attachment; filename=CrawlerUrls.txt');
            // Printing the content of the CSV lines:
            echo implode(chr(13) . chr(10), $this->downloadUrls);
            // Exits:
            exit;
        }
        // Return output:
        return $output;
    }
Пример #2
0
 /**
  * Function executed by crawler_im.php cli script.
  *
  * @return	void
  */
 function CLI_main_im()
 {
     $this->setAccessMode('cli_im');
     $cliObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_cli_im');
     // Force user to admin state and set workspace to "Live":
     $this->backendUser->user['admin'] = 1;
     $this->backendUser->setWorkspace(0);
     // Print help
     if (!isset($cliObj->cli_args['_DEFAULT'][1])) {
         $cliObj->cli_validateArgs();
         $cliObj->cli_help();
         exit;
     }
     $cliObj->cli_validateArgs();
     if ($cliObj->cli_argValue('-o') === 'exec') {
         $this->registerQueueEntriesInternallyOnly = TRUE;
     }
     if (isset($cliObj->cli_args['_DEFAULT'][2])) {
         // Crawler is called over TYPO3 BE
         $pageId = tx_crawler_api::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][2], 0);
     } else {
         // Crawler is called over cli
         $pageId = tx_crawler_api::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0);
     }
     $configurationKeys = $this->getConfigurationKeys($cliObj);
     if (!is_array($configurationKeys)) {
         $configurations = $this->getUrlsForPageId($pageId);
         if (is_array($configurations)) {
             $configurationKeys = array_keys($configurations);
         } else {
             $configurationKeys = array();
         }
     }
     if ($cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec') {
         $reason = new tx_crawler_domain_reason();
         $reason->setReason(tx_crawler_domain_reason::REASON_GUI_SUBMIT);
         $reason->setDetailText('The cli script of the crawler added to the queue');
         tx_crawler_domain_events_dispatcher::getInstance()->post('invokeQueueChange', $this->setID, array('reason' => $reason));
     }
     if ($this->extensionSettings['cleanUpOldQueueEntries']) {
         $this->cleanUpOldQueueEntries();
     }
     $this->setID = \TYPO3\CMS\Core\Utility\GeneralUtility::md5int(microtime());
     $this->getPageTreeAndUrls($pageId, tx_crawler_api::forceIntegerInRange($cliObj->cli_argValue('-d'), 0, 99), $this->getCurrentTime(), tx_crawler_api::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30, 1, 1000), $cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec', $cliObj->cli_argValue('-o') === 'url', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cliObj->cli_argValue('-proc'), 1), $configurationKeys);
     if ($cliObj->cli_argValue('-o') === 'url') {
         $cliObj->cli_echo(implode(chr(10), $this->downloadUrls) . chr(10), 1);
     } elseif ($cliObj->cli_argValue('-o') === 'exec') {
         $cliObj->cli_echo("Executing " . count($this->urlList) . " requests right away:\n\n");
         $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10));
         $cliObj->cli_echo("\nProcessing:\n");
         foreach ($this->queueEntries as $queueRec) {
             $p = unserialize($queueRec['parameters']);
             $cliObj->cli_echo($p['url'] . ' (' . implode(',', $p['procInstructions']) . ') => ');
             $result = $this->readUrlFromArray($queueRec);
             $requestResult = unserialize($result['content']);
             if (is_array($requestResult)) {
                 $resLog = is_array($requestResult['log']) ? chr(10) . chr(9) . chr(9) . implode(chr(10) . chr(9) . chr(9), $requestResult['log']) : '';
                 $cliObj->cli_echo('OK: ' . $resLog . chr(10));
             } else {
                 $cliObj->cli_echo('Error checking Crawler Result: ' . substr(preg_replace('/\\s+/', ' ', strip_tags($result['content'])), 0, 30000) . '...' . chr(10));
             }
         }
     } elseif ($cliObj->cli_argValue('-o') === 'queue') {
         $cliObj->cli_echo("Putting " . count($this->urlList) . " entries in queue:\n\n");
         $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10));
     } else {
         $cliObj->cli_echo(count($this->urlList) . " entries found for processing. (Use -o to decide action):\n\n", 1);
         $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10), 1);
     }
 }