public function process() { self::$config = $this->config(); if (!self::$config->wkHtmlToPdfPath) { throw new Exception("You must provide a path for WkHtmlToPdf in your sites configuration."); } if (!self::$config->emailAddress) { throw new Exception("You must provide an email address to send from in your sites configuration."); } increase_memory_limit_to('1024M'); set_time_limit(0); $sites = LinkCheckSite::get(); $outputDir = BASE_PATH . DIRECTORY_SEPARATOR . "silverstripe-linkcheck/runs/"; $filesCreated = array(); // build the crawler chdir(__DIR__ . "/../thirdparty"); exec("javac " . self::$crawler . " " . self::$linkStats . " && " . "javac " . self::$linkProject); if ($sites) { foreach ($sites as $site) { echo "Checking " . $site->SiteURL . "\r\n"; $url = $site->SiteURL; // if the output directory doesn't exist for the run, create it if (!file_exists($outputDir . str_replace("http://", "", $url))) { mkdir($outputDir . str_replace("http://", "", $url)); } $filename = date("Y-m-d") . '-' . rand(0, 1000) . ".html"; $filepath = $outputDir . str_replace("http://", "", $url) . '/'; // execute the crawler exec("java Project {$url} " . $filepath . $filename . " 10 1000"); $filesCreated[$site->ID]['FilePath'] = $filepath; $filesCreated[$site->ID]['FileName'] = $filename; $filesCreated[$site->ID]['SiteName'] = $site->SiteName; $filesCreated[$site->ID]['ID'] = $site->ID; $filesCreated[$site->ID]['URL'] = $url; $emailRecipients = $site->EmailRecipients(); if ($emailRecipients) { foreach ($emailRecipients as $recipient) { $filesCreated[$site->ID]['Email'][] = $recipient->Email; } } } foreach ($filesCreated as $file) { Folder::find_or_make("LinkCheck" . DIRECTORY_SEPARATOR . $file['SiteName'] . DIRECTORY_SEPARATOR); $pdfPath = "assets" . DIRECTORY_SEPARATOR . "LinkCheck" . DIRECTORY_SEPARATOR . $file['SiteName'] . DIRECTORY_SEPARATOR; $pdfFullPath = BASE_PATH . DIRECTORY_SEPARATOR . $pdfPath; $pdfName = str_replace("html", "pdf", $file['FileName']); $generator = new WkHtml\Generator(new \Knp\Snappy\Pdf(self::$config->wkHtmlToPdfPath), new WkHtml\Input\String(file_get_contents($file['FilePath'] . $file['FileName'])), new WkHtml\Output\File($pdfFullPath . $pdfName, 'application/pdf')); $generator->process(); $site = LinkCheckSite::get()->byID($file['ID']); $pdfUpload = new File(); $pdfUpload->Title = $file['SiteName'] . '-' . $pdfName; $pdfUpload->Filename = $pdfPath . $pdfName; $pdfUpload->write(); $linkCheckRun = new LinkCheckRun(); $linkCheckRun->LinkCheckFileID = $pdfUpload->ID; $linkCheckRun->LinkCheckSiteID = $site->ID; $linkCheckRun->write(); $site->LinkCheckRuns()->add($linkCheckRun); foreach ($file['Email'] as $emailAddress) { $email = new Email(); $email->to = $emailAddress; $email->from = $this->config()->emailAddress; $email->subject = $file['SiteName'] . " link check run"; $email->body = "Site Link Check Run for {$file['URL']} on " . date("Y/m/d"); $email->attachFile($pdfPath . $pdfName, "linkcheck.pdf"); $email->send(); } unlink($file['FilePath'] . $file['FileName']); } } }
/** * Run the LinkCheckTask. * @todo Split functionality to separate methods */ public function process() { if (class_exists('SapphireTest', false) && !SapphireTest::is_running_test()) { echo "\r\n"; } if (!ClassInfo::hasTable('LinkCheckRun')) { if (!Director::is_ajax() && class_exists('SapphireTest', false) && !SapphireTest::is_running_test()) { echo "Database has not been built. Please run dev/build first!\r\n"; } return false; } // If there is already a LinkCheckRun that exists and is not complete, // don't allow a new run as it could run the server to the ground! // @todo we probably want some system that allows cancelling a check halfway through if (DataObject::get_one('LinkCheckRun', "\"IsComplete\" = 0")) { if (!Director::is_ajax() && class_exists('SapphireTest', false) && !SapphireTest::is_running_test()) { echo "There is already a link check running at the moment. Please wait for it to complete before starting a new one.\r\n"; } return false; } set_time_limit(0); ini_set('max_execution_time', 0); $goodLinks = 0; // 200-299 HTTP status codes $checkLinks = 0; // 300-399 HTTP status codes $brokenLinks = 0; // 400-599 HTTP status codes $pages = DataObject::get('SiteTree'); if (!$pages) { return false; } $run = new LinkCheckRun(); // We have started a new run, create the object and write it $run->write(); $pagesChecked = 0; foreach ($pages as $page) { // Skip this page if it shouldn't be checked if (isset(self::$exempt_classes[get_class($page)])) { continue; } $processor = new LinkCheckProcessor($page->AbsoluteLink()); if (Director::is_ajax()) { $processor->showMessages = false; } $results = $processor->run(); // Memory cleanup - we don't need the processor anymore unset($processor); if ($results) { foreach ($results as $result) { if ($result['Code'] >= 200 && $result['Code'] <= 299) { $goodLinks++; } elseif ($result['Code'] >= 300 && $result['Code'] <= 399) { $checkLinks++; } elseif ($result['Code'] >= 400 && $result['Code'] <= 599) { $brokenLinks++; } // If the result is "Bad" (broken), create a BrokenLink record if ($result['Code'] >= 400 && $result['Code'] <= 599) { $brokenLink = new BrokenLink(); $brokenLink->Link = substr($result['Link'], 0, 255); $brokenLink->Code = $result['Code']; $brokenLink->Status = substr($result['Status'], 0, 30); $brokenLink->LinkCheckRunID = $run->ID; $brokenLink->PageID = $page->ID; $brokenLink->write(); // Memory cleanup $brokenLink->destroy(); } } } // Memory cleanup $page->destroy(); $pagesChecked++; } // Memory cleanup unset($pages); // Mark as done - this is to indicate that the task has completed (for reporting in CMS) $run->FinishDate = date('Y-m-d H:i:s'); $run->IsComplete = 1; $run->PagesChecked = $pagesChecked; $run->write(); // Find the URL to the LinkCheckAdmin section in the CMS $linkcheckAdminLink = Director::absoluteBaseURL() . singleton('LinkCheckAdmin')->Link(); // Count the number of BrokenLink records created for this run $runBrokenLinks = $run->BrokenLinks()->Count() ? $run->BrokenLinks()->Count() : 0; if (Director::is_ajax()) { return array('Date' => $run->obj('Created')->Nice(), 'LinkCheckRunID' => $run->ID); } elseif (Director::is_cli()) { if (class_exists('SapphireTest', false) && SapphireTest::is_running_test()) { return; } echo "SilverStripe Link Checker results\n"; echo "---------------------------------\n\n"; echo "{$pagesChecked} pages were checked for broken links.\n"; echo "{$goodLinks} links were OK.\n"; echo "{$checkLinks} links were redirected.\n"; echo "{$brokenLinks} links were broken, and {$runBrokenLinks} BrokenLink records were generated for them.\n\n"; echo "LinkCheckRun ID #{$run->ID} was created with {$runBrokenLinks} BrokenLink related records.\n"; echo "Please visit {$linkcheckAdminLink} to see which broken links were found.\n\n"; } else { if (class_exists('SapphireTest', false) && SapphireTest::is_running_test()) { return; } echo "<h1>SilverStripe Link Checker results</h1>"; echo '<ul>'; echo "<li>{$pagesChecked} pages were checked for broken links.</li>"; echo "<li>{$goodLinks} links were OK.</li>"; echo "<li>{$checkLinks} links were redirected.</li>"; echo "<li>{$brokenLinks} links were broken, and {$runBrokenLinks} BrokenLink records were generated for them.</li>"; echo '</ul>'; echo "<p>LinkCheckRun ID #{$run->ID} was created with {$runBrokenLinks} BrokenLink related records.</p>"; echo "<p>Please visit <a href=\"{$linkcheckAdminLink}\">{$linkcheckAdminLink}</a> to see which broken links were found.</p>"; } }