/** * Show all the runs and last runs detail * * @global array $_ARRAYLANG */ public function showCrawlerRuns() { global $_ARRAYLANG; //show the last runs details $lastRunResult = $this->crawlerRepository->getLatestRunDetails(); if ($lastRunResult) { $this->template->setVariable(array($this->moduleNameLang . '_LAST_RUN_STARTTIME' => \Cx\Core_Modules\LinkManager\Controller\DateTime::formattedDateAndTime($lastRunResult[0]->getStartTime()), $this->moduleNameLang . '_LAST_RUN_ENDTIME' => \Cx\Core_Modules\LinkManager\Controller\DateTime::formattedDateAndTime($lastRunResult[0]->getEndTime()), $this->moduleNameLang . '_LAST_RUN_DURATION' => \Cx\Core_Modules\LinkManager\Controller\DateTime::diffTime($lastRunResult[0]->getStartTime(), $lastRunResult[0]->getEndTime()), $this->moduleNameLang . '_LAST_RUN_TOTAL_LINKS' => $lastRunResult[0]->getTotalLinks(), $this->moduleNameLang . '_LAST_RUN_BROKEN_LINKS' => $lastRunResult[0]->getTotalBrokenLinks())); } else { if ($this->template->blockExists('showLastRun')) { $this->template->hideBlock('showLastRun'); } } //show Crawler Runs table //get parameters $pos = isset($_GET['pos']) ? $_GET['pos'] : 0; $langArray = \FWLanguage::getLanguageArray(); //set the settings value from DB \Cx\Core\Setting\Controller\Setting::init('LinkManager', 'config'); $pageLimit = \Cx\Core\Setting\Controller\Setting::getValue('entriesPerPage', 'LinkManager'); $parameter = './index.php?cmd=' . $this->moduleName; $this->template->setVariable('ENTRIES_PAGING', \Paging::get($parameter, $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_LINKS'], $this->crawlerRepository->crawlerEntryCount(), $pageLimit, true, $pos, 'pos')); $crawlers = $this->crawlerRepository->getCrawlerRunEntries($pos, $pageLimit); $i = 1; if ($crawlers && $crawlers->count() > 0) { foreach ($crawlers as $crawler) { $this->template->setVariable(array($this->moduleNameLang . '_CRAWLER_RUN_ID' => $crawler->getId(), $this->moduleNameLang . '_CRAWLER_RUN_LANGUAGE' => $langArray[$crawler->getLang()]['name'], $this->moduleNameLang . '_CRAWLER_RUN_STARTTIME' => \Cx\Core_Modules\LinkManager\Controller\DateTime::formattedDateAndTime($crawler->getStartTime()), $this->moduleNameLang . '_CRAWLER_RUN_ENDTIME' => \Cx\Core_Modules\LinkManager\Controller\DateTime::formattedDateAndTime($crawler->getEndTime()), $this->moduleNameLang . '_CRAWLER_RUN_DURATION' => \Cx\Core_Modules\LinkManager\Controller\DateTime::diffTime($crawler->getStartTime(), $crawler->getEndTime()), $this->moduleNameLang . '_CRAWLER_RUN_TOTAL_LINKS' => $crawler->getTotalLinks(), $this->moduleNameLang . '_CRAWLER_RUN_BROKEN_LINKS' => $crawler->getTotalBrokenLinks(), $this->moduleNameLang . '_CRAWLER_RUN_STATUS' => ucfirst($crawler->getRunStatus()), $this->moduleNameLang . '_CRAWLER_RUN_ROW' => 'row' . (++$i % 2 + 1))); $this->template->parse($this->moduleName . 'CrawlerRuns'); } $this->template->hideBlock($this->moduleName . 'NoCrawlerRunsFound'); } else { $this->template->touchBlock($this->moduleName . 'NoCrawlerRunsFound'); } }
/** * Checks before the crawler is triggerd, for crawling entries, * which aren't running anymore, but still have a status of "running". * The Status of those entries will be changed to "incomplete" */ private function changeRunningCrawlingToIncomplete() { $crawlings = $this->crawlerRepo->findBy(array('runStatus' => self::RUN_STATUS_RUNNING)); if ($crawlings && count($crawlings) > 0) { foreach ($crawlings as $crawlRun) { $crawlRun->setRunStatus(self::RUN_STATUS_INCOMPLETE); $this->em->persist($crawlRun); } $this->em->flush(); } }
/** * Checks before the crawler is triggerd, for crawling entries, * which aren't running anymore, but still have a status of "running". * The Status of those entries will be changed to "incomplete" */ protected function changeRunningCrawlingToIncomplete() { $crawlings = $this->crawlerRepo->findBy(array('runStatus' => self::RUN_STATUS_RUNNING)); if (!$crawlings || !count($crawlings)) { return; } foreach ($crawlings as $crawlRun) { $crawlRun->setRunStatus(self::RUN_STATUS_INCOMPLETE); } $this->em->flush(); }
/** * Recheck the selected links status * * @return null */ public function recheckSelectedLinks() { global $_ARRAYLANG; //Get the post values $selectedIds = isset($_POST['selected']) ? $_POST['selected'] : ''; $links = $this->linkRepository->getSelectedLinks($selectedIds); if (!$links) { $links = array(); } $pageLinks = array(); foreach ($links as $link) { $refererPath = $link->getRefererPath(); $requestPath = $link->getRequestedPath(); $subLinks = array(); $recheckPage = false; // Get the Links in the referer // Recheck the refer once (on first request of refer) if (array_key_exists($refererPath, $pageLinks)) { $subLinks = $pageLinks[$refererPath]; } else { $pageLinks[$refererPath] = $subLinks = $this->getController('LinkCrawler')->getPageLinks($refererPath); $recheckPage = true; } if ($recheckPage) { $this->recheckPage($link, $subLinks); } // Check whether the request path exists in the referer page // if not exists remove the link if (!array_key_exists($requestPath, $subLinks)) { $this->em->remove($link); } else { $urlStatus = $this->getUrlStatus($link->getRequestedPath()); $link->setLinkStatusCode($urlStatus); $link->setFlagStatus($urlStatus == 200 ? 1 : 0); $link->setLinkRecheck(true); } } //update the broken links count in crawler table foreach (\FWLanguage::getActiveFrontendLanguages() as $lang) { $lastRunByLang = $this->crawlerRepository->getLastRunByLang($lang['id']); $brokenLinkCnt = $this->linkRepository->brokenLinkCountByLang($lang['id']); if ($lastRunByLang) { $lastRunByLang->setTotalBrokenLinks($brokenLinkCnt); } } $this->em->flush(); \Message::ok($_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_SUCCESS_MSG']); }
/** * Recheck the selected links status * * @global array $_ARRAYLANG * * @return null */ public function recheckSelectedLinks() { global $_ARRAYLANG; $selectedIds = isset($_POST['selected']) ? $_POST['selected'] : ''; $links = $this->linkRepository->getSelectedLinks($selectedIds); if (!$links) { $links = array(); } $request = new \HTTP_Request2(); $pageLinks = array(); foreach ($links as $link) { if (!in_array($link->getEntryTitle(), $pageLinks)) { $pageLinks[] = $link->getEntryTitle(); ${$link->getEntryTitle()} = array(); try { $request->setUrl($link->getRefererPath()); $request->setConfig(array('ssl_verify_peer' => false, 'ssl_verify_host' => false, 'follow_redirects' => true)); $response = $request->send(); $html = \str_get_html($response->getBody()); } catch (\Exception $e) { $html = false; } if (!$html) { continue; } else { //remove the navigation menu $objNavigation = $html->find('ul#navigation, ul.navigation', 0); $objNavigation->outertext = ''; $html = \str_get_html($html->outertext); // Find all images foreach ($html->find('img') as $element) { if (preg_match('#\\.(jpg|jpeg|gif|png)$# i', $element->src)) { $imgSrc = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->src, null); if (!empty($imgSrc)) { ${$link->getEntryTitle()}[$imgSrc] = $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_IMAGE']; } } } // Find all links foreach ($html->find('a') as $element) { $aHref = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->href, $link->getRefererPath()); if (!empty($aHref)) { $linkText = $element->plaintext ? $element->plaintext : $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_LINK']; ${$link->getEntryTitle()}[$aHref] = $linkText; } } } } if (!array_key_exists($link->getRequestedPath(), ${$link->getEntryTitle()})) { $linkInputValues = array('lang' => $link->getLang(), 'refererPath' => $link->getRefererPath(), 'leadPath' => $link->getLeadPath(), 'entryTitle' => $link->getEntryTitle(), 'detectedTime' => $link->getDetectedTime(), 'updatedBy' => 0); $this->recheckPage(${$link->getEntryTitle()}, $linkInputValues, $request); $this->em->remove($link); } else { try { $request->setUrl($link->getRequestedPath()); $response = $request->send(); $urlStatus = $response->getStatus(); } catch (\Exception $e) { $urlStatus = 0; } if ($urlStatus == '200') { $this->em->remove($link); } else { $link->setLinkStatusCode($urlStatus); $link->setLinkRecheck(true); } } $this->em->persist($link); $this->em->flush(); } //update the broken links count in crawler table foreach (\FWLanguage::getActiveFrontendLanguages() as $lang) { $lastRunByLang = $this->crawlerRepository->getLastRunByLang($lang['id']); $brokenLinkCnt = $this->linkRepository->brokenLinkCountByLang($lang['id']); if ($lastRunByLang) { $lastRunByLang->setTotalBrokenLinks($brokenLinkCnt); $this->em->persist($lastRunByLang); } } $this->em->flush(); \Message::ok($_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_SUCCESS_MSG']); }