コード例 #1
0
 /**
  * Crawling initialization script
  * 
  * @global object $objInit
  * 
  * @param string         $url         lead url
  * @param \HTTP_Request2 $request     http_request object
  * @param integer        $referPageId lead page id
  * 
  * @return null
  */
 public function initializeScript($url, \HTTP_Request2 $request, $referPageId)
 {
     global $objInit;
     $_ARRAYLANG = $objInit->loadLanguageData('LinkManager');
     $refererUrlResponse = $this->checkUrlStatus($url, $request);
     $this->storeUrlInfos($request, $url, $url, 0, $referPageId, $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_LINK']);
     if ($refererUrlResponse) {
         $refererUrlBody = $refererUrlResponse->getBody();
         $html = \str_get_html($refererUrlBody);
         if ($html) {
             //First check the page content href and src
             foreach ($html->find(ASCMS_LINKMANAGER_CONTENT_HREF_QUERY) as $element) {
                 $aHref = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->href, $url);
                 if (!empty($aHref) && $this->isLinkExists($aHref, true)) {
                     $linkText = $element->plaintext ? $element->plaintext : $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_LINK'];
                     $this->storeUrlInfos($request, $aHref, $url, 0, $referPageId, $linkText);
                 }
             }
             foreach ($html->find(ASCMS_LINKMANAGER_CONTENT_IMG_QUERY) as $element) {
                 if (preg_match('#\\.(jpg|jpeg|gif|png)$# i', $element->src)) {
                     $imgSrc = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->src, null);
                     if (!empty($imgSrc) && $this->isLinkExists($imgSrc, true)) {
                         $this->storeUrlInfos($request, $imgSrc, $url, 1, $referPageId, $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_IMAGE']);
                     }
                 }
             }
             //remove the page content
             $objPageContent = $html->find(ASCMS_LINKMANAGER_CONTENT_PAGE_QUERY, 0);
             $objPageContent->outertext = '';
             $html = \str_get_html($html->outertext);
             //remove the navigation menu
             $objNavigation = $html->find(ASCMS_LINKMANAGER_NAVIGATION_QUERY, 0);
             $objNavigation->outertext = '';
             $html = \str_get_html($html->outertext);
             // Find all images
             foreach ($html->find('img') as $element) {
                 if (preg_match('#\\.(jpg|jpeg|gif|png)$# i', $element->src)) {
                     $imgSrc = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->src, null);
                     if (!empty($imgSrc) && $this->isLinkExists($imgSrc)) {
                         $this->storeUrlInfos($request, $imgSrc, $url, 1, $referPageId, $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_IMAGE']);
                     }
                 }
             }
             // Find all links
             foreach ($html->find('a') as $element) {
                 $aHref = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->href, $url);
                 if (!empty($aHref) && $this->isLinkExists($aHref)) {
                     $linkText = $element->plaintext ? $element->plaintext : $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_LINK'];
                     $this->storeUrlInfos($request, $aHref, $url, 0, $referPageId, $linkText);
                 }
             }
         }
     } else {
         return;
     }
 }
コード例 #2
0
 /**
  * Recheck the selected links status
  * 
  * @global array $_ARRAYLANG
  * 
  * @return null
  */
 public function recheckSelectedLinks()
 {
     global $_ARRAYLANG;
     $selectedIds = isset($_POST['selected']) ? $_POST['selected'] : '';
     $links = $this->linkRepository->getSelectedLinks($selectedIds);
     if (!$links) {
         $links = array();
     }
     $request = new \HTTP_Request2();
     $pageLinks = array();
     foreach ($links as $link) {
         if (!in_array($link->getEntryTitle(), $pageLinks)) {
             $pageLinks[] = $link->getEntryTitle();
             ${$link->getEntryTitle()} = array();
             try {
                 $request->setUrl($link->getRefererPath());
                 $request->setConfig(array('ssl_verify_peer' => false, 'ssl_verify_host' => false, 'follow_redirects' => true));
                 $response = $request->send();
                 $html = \str_get_html($response->getBody());
             } catch (\Exception $e) {
                 $html = false;
             }
             if (!$html) {
                 continue;
             } else {
                 //remove the navigation menu
                 $objNavigation = $html->find('ul#navigation, ul.navigation', 0);
                 $objNavigation->outertext = '';
                 $html = \str_get_html($html->outertext);
                 // Find all images
                 foreach ($html->find('img') as $element) {
                     if (preg_match('#\\.(jpg|jpeg|gif|png)$# i', $element->src)) {
                         $imgSrc = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->src, null);
                         if (!empty($imgSrc)) {
                             ${$link->getEntryTitle()}[$imgSrc] = $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_IMAGE'];
                         }
                     }
                 }
                 // Find all links
                 foreach ($html->find('a') as $element) {
                     $aHref = \Cx\Core_Modules\LinkManager\Controller\Url::checkPath($element->href, $link->getRefererPath());
                     if (!empty($aHref)) {
                         $linkText = $element->plaintext ? $element->plaintext : $_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_NO_LINK'];
                         ${$link->getEntryTitle()}[$aHref] = $linkText;
                     }
                 }
             }
         }
         if (!array_key_exists($link->getRequestedPath(), ${$link->getEntryTitle()})) {
             $linkInputValues = array('lang' => $link->getLang(), 'refererPath' => $link->getRefererPath(), 'leadPath' => $link->getLeadPath(), 'entryTitle' => $link->getEntryTitle(), 'detectedTime' => $link->getDetectedTime(), 'updatedBy' => 0);
             $this->recheckPage(${$link->getEntryTitle()}, $linkInputValues, $request);
             $this->em->remove($link);
         } else {
             try {
                 $request->setUrl($link->getRequestedPath());
                 $response = $request->send();
                 $urlStatus = $response->getStatus();
             } catch (\Exception $e) {
                 $urlStatus = 0;
             }
             if ($urlStatus == '200') {
                 $this->em->remove($link);
             } else {
                 $link->setLinkStatusCode($urlStatus);
                 $link->setLinkRecheck(true);
             }
         }
         $this->em->persist($link);
         $this->em->flush();
     }
     //update the broken links count in crawler table
     foreach (\FWLanguage::getActiveFrontendLanguages() as $lang) {
         $lastRunByLang = $this->crawlerRepository->getLastRunByLang($lang['id']);
         $brokenLinkCnt = $this->linkRepository->brokenLinkCountByLang($lang['id']);
         if ($lastRunByLang) {
             $lastRunByLang->setTotalBrokenLinks($brokenLinkCnt);
             $this->em->persist($lastRunByLang);
         }
     }
     $this->em->flush();
     \Message::ok($_ARRAYLANG['TXT_CORE_MODULE_LINKMANAGER_SUCCESS_MSG']);
 }