/** * Running the functionality of the CLI (crawling URLs from queue) * * @param int $countInARun * @param int $sleepTime * @param int $sleepAfterFinish * @return string Status message */ public function CLI_run($countInARun, $sleepTime, $sleepAfterFinish) { $result = 0; $counter = 0; // First, run hooks: $this->CLI_runHooks(); // Clean up the queue if (intval($this->extensionSettings['purgeQueueDays']) > 0) { $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']); $del = $this->db->exec_DELETEquery('tx_crawler_queue', 'exec_time!=0 AND exec_time<' . $purgeDate); } // Select entries: //TODO Shouldn't this reside within the transaction? $rows = $this->db->exec_SELECTgetRows('qid,scheduled', 'tx_crawler_queue', 'exec_time=0 AND process_scheduled= 0 AND scheduled<=' . $this->getCurrentTime(), '', 'scheduled, qid', intval($countInARun)); if (count($rows) > 0) { $quidList = array(); foreach ($rows as $r) { $quidList[] = $r['qid']; } $processId = $this->CLI_buildProcessId(); //reserve queue entrys for process $this->db->sql_query('BEGIN'); //TODO make sure we're not taking assigned queue-entires $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid IN (' . implode(',', $quidList) . ')', array('process_scheduled' => intval($this->getCurrentTime()), 'process_id' => $processId)); //save the number of assigned queue entrys to determine who many have been processed later $numberOfAffectedRows = $this->db->sql_affected_rows(); $this->db->exec_UPDATEquery('tx_crawler_process', "process_id = '" . $processId . "'", array('assigned_items_count' => intval($numberOfAffectedRows))); if ($numberOfAffectedRows == count($quidList)) { $this->db->sql_query('COMMIT'); } else { $this->db->sql_query('ROLLBACK'); $this->CLI_debug("Nothing processed due to multi-process collision (" . $this->CLI_buildProcessId() . ")"); return $result | self::CLI_STATUS_ABORTED; } foreach ($rows as $r) { $result |= $this->readUrl($r['qid']); $counter++; usleep(intval($sleepTime)); // Just to relax the system // if during the start and the current read url the cli has been disable we need to return from the function // mark the process NOT as ended. if ($this->getDisabled()) { return $result | self::CLI_STATUS_ABORTED; } if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) { $this->CLI_debug("conflict / timeout (" . $this->CLI_buildProcessId() . ")"); //TODO might need an additional returncode $result |= self::CLI_STATUS_ABORTED; break; //possible timeout } } sleep(intval($sleepAfterFinish)); $msg = 'Rows: ' . $counter; $this->CLI_debug($msg . " (" . $this->CLI_buildProcessId() . ")"); } else { $this->CLI_debug("Nothing within queue which needs to be processed (" . $this->CLI_buildProcessId() . ")"); } if ($counter > 0) { $result |= self::CLI_STATUS_PROCESSED; } return $result; }