コード例 #1
0
ファイル: class.tx_crawler_lib.php プロジェクト: b13/crawler
    /**
     * Running the functionality of the CLI (crawling URLs from queue)
     *
     * @param  int $countInARun
     * @param  int $sleepTime
     * @param  int $sleepAfterFinish
     * @return string                   Status message
     */
    public function CLI_run($countInARun, $sleepTime, $sleepAfterFinish)
    {
        $result = 0;
        $counter = 0;
        // First, run hooks:
        $this->CLI_runHooks();
        // Clean up the queue
        if (intval($this->extensionSettings['purgeQueueDays']) > 0) {
            $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']);
            $del = $this->db->exec_DELETEquery('tx_crawler_queue', 'exec_time!=0 AND exec_time<' . $purgeDate);
        }
        // Select entries:
        //TODO Shouldn't this reside within the transaction?
        $rows = $this->db->exec_SELECTgetRows('qid,scheduled', 'tx_crawler_queue', 'exec_time=0
				AND process_scheduled= 0
				AND scheduled<=' . $this->getCurrentTime(), '', 'scheduled, qid', intval($countInARun));
        if (count($rows) > 0) {
            $quidList = array();
            foreach ($rows as $r) {
                $quidList[] = $r['qid'];
            }
            $processId = $this->CLI_buildProcessId();
            //reserve queue entrys for process
            $this->db->sql_query('BEGIN');
            //TODO make sure we're not taking assigned queue-entires
            $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid IN (' . implode(',', $quidList) . ')', array('process_scheduled' => intval($this->getCurrentTime()), 'process_id' => $processId));
            //save the number of assigned queue entrys to determine who many have been processed later
            $numberOfAffectedRows = $this->db->sql_affected_rows();
            $this->db->exec_UPDATEquery('tx_crawler_process', "process_id = '" . $processId . "'", array('assigned_items_count' => intval($numberOfAffectedRows)));
            if ($numberOfAffectedRows == count($quidList)) {
                $this->db->sql_query('COMMIT');
            } else {
                $this->db->sql_query('ROLLBACK');
                $this->CLI_debug("Nothing processed due to multi-process collision (" . $this->CLI_buildProcessId() . ")");
                return $result | self::CLI_STATUS_ABORTED;
            }
            foreach ($rows as $r) {
                $result |= $this->readUrl($r['qid']);
                $counter++;
                usleep(intval($sleepTime));
                // Just to relax the system
                // if during the start and the current read url the cli has been disable we need to return from the function
                // mark the process NOT as ended.
                if ($this->getDisabled()) {
                    return $result | self::CLI_STATUS_ABORTED;
                }
                if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) {
                    $this->CLI_debug("conflict / timeout (" . $this->CLI_buildProcessId() . ")");
                    //TODO might need an additional returncode
                    $result |= self::CLI_STATUS_ABORTED;
                    break;
                    //possible timeout
                }
            }
            sleep(intval($sleepAfterFinish));
            $msg = 'Rows: ' . $counter;
            $this->CLI_debug($msg . " (" . $this->CLI_buildProcessId() . ")");
        } else {
            $this->CLI_debug("Nothing within queue which needs to be processed (" . $this->CLI_buildProcessId() . ")");
        }
        if ($counter > 0) {
            $result |= self::CLI_STATUS_PROCESSED;
        }
        return $result;
    }