Пример #1
0
 public function testValidateURL()
 {
     $this->assertFalse(Utils::validateURL('yaya'));
     $this->assertFalse(Utils::validateURL('http:///thediviningwand.com'));
     $this->assertTrue(Utils::validateURL('http://asdf.com'));
     $this->assertTrue(Utils::validateURL('https://asdf.com'));
 }
Пример #2
0
 /**
  * Run when the crawler does
  */
 public function crawl()
 {
     $logger = Logger::getInstance();
     $ldao = DAOFactory::getDAO('LinkDAO');
     //@TODO Set limit on total number of links to expand per crawler run in the plugin settings, for now 1500
     $linkstoexpand = $ldao->getLinksToExpand(1500);
     $logger->logStatus(count($linkstoexpand) . " links to expand", "Expand URLs Plugin");
     foreach ($linkstoexpand as $l) {
         if (Utils::validateURL($l)) {
             $eurl = self::untinyurl($l, $ldao);
             if ($eurl != '') {
                 $ldao->saveExpandedUrl($l, $eurl);
             }
         } else {
             $logger->logStatus($l . " is not a valid URL; skipping expansion", "Expand URLs Plugin");
         }
     }
     $logger->logStatus("URL expansion complete for this run", "Expand URLs Plugin");
     $logger->close();
     # Close logging
 }
Пример #3
0
    /**
     * Run when the crawler does
     * @TODO Set limit on total number of links to expand per crawler run in the plugin settings, for now 1500
     */
    public function crawl() {
        $logger = Logger::getInstance();
        $logger->setUsername(null);
        $ldao = DAOFactory::getDAO('LinkDAO');

        $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
        $options = $plugin_option_dao->getOptionsHash('expandurls', true);

        $total_links_to_expand = isset($options['links_to_expand']->option_value) ? 
        (int)$options['links_to_expand']->option_value : 1500;
        $linkstoexpand = $ldao->getLinksToExpand($total_links_to_expand);

        $logger->logUserInfo(count($linkstoexpand)." links to expand. Please wait. Working...",
        __METHOD__.','.__LINE__);

        $total_expanded = 0;
        $total_errors = 0;
        foreach ($linkstoexpand as $l) {
            if (Utils::validateURL($l)) {
                $logger->logInfo("Expanding ".($total_expanded+1). " of ".count($linkstoexpand)." (".$l.")",
                __METHOD__.','.__LINE__);

                $eurl = self::untinyurl($l, $ldao);
                if ($eurl != '') {
                    $ldao->saveExpandedUrl($l, $eurl);
                    $total_expanded = $total_expanded + 1;
                } else {
                    $total_errors = $total_errors + 1;
                }
            } else {
                $total_errors = $total_errors + 1;
                $logger->logError($l." is not a valid URL; skipping expansion", __METHOD__.','.__LINE__);
            }
        }
        $logger->logUserSuccess($total_expanded." URLs successfully expanded (".$total_errors." errors).",
        __METHOD__.','.__LINE__);
    }
 /**
  * Expand all unexpanded URLs
  * @param $total_links_to_expand The number of links to expand
  */
 public function expandRemainingURLs($total_links_to_expand)
 {
     $logger = Logger::getInstance();
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $linkstoexpand = $link_dao->getLinksToExpand($total_links_to_expand);
     $logger->logUserInfo(count($linkstoexpand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     foreach ($linkstoexpand as $l) {
         if (Utils::validateURL($l)) {
             $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($linkstoexpand) . " (" . $l . ")", __METHOD__ . ',' . __LINE__);
             $eurl = self::untinyurl($l, $link_dao);
             if ($eurl != '') {
                 $link_dao->saveExpandedUrl($l, $eurl);
                 $total_expanded = $total_expanded + 1;
             } else {
                 $total_errors = $total_errors + 1;
             }
         } else {
             $total_errors = $total_errors + 1;
             $logger->logError($l . " is not a valid URL; skipping expansion", __METHOD__ . ',' . __LINE__);
         }
     }
     $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }
Пример #5
0
 /**
  * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links.
  */
 public function expandOriginalURLs($flickr_api_key = null)
 {
     $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit);
     $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     $has_expanded_flickr_link = false;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link->url)) {
             $endless_loop_prevention_counter = 0;
             $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link->url;
             while (!$fully_expanded) {
                 //begin Flickr thumbnail processing
                 if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
                     self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url);
                     $has_expanded_flickr_link = true;
                     $fully_expanded = true;
                 }
                 //end Flickr thumbnail processing
                 $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger);
                 if ($expanded_url == $short_link || $expanded_url == '' || $endless_loop_prevention_counter > self::EXPANSION_CAP) {
                     $fully_expanded = true;
                 } else {
                     try {
                         $this->short_link_dao->insert($link->id, $short_link);
                     } catch (DataExceedsColumnWidthException $e) {
                         $this->logger->logError($short_link . " short link record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__);
                         $fully_expanded = true;
                     }
                 }
                 if (strlen($expanded_url) < 256) {
                     $short_link = $expanded_url;
                 } else {
                     $fully_expanded = true;
                 }
                 $endless_loop_prevention_counter++;
             }
             if (!$has_expanded_flickr_link) {
                 if ($expanded_url != '') {
                     $image_src = URLProcessor::getImageSource($expanded_url);
                     $url_details = URLExpander::getWebPageDetails($expanded_url);
                     try {
                         $this->link_dao->saveExpandedUrl($link->url, $expanded_url, $url_details['title'], $image_src, $url_details['description']);
                         $total_expanded = $total_expanded + 1;
                     } catch (DataExceedsColumnWidthException $e) {
                         $this->logger->logError($link->url . " record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__);
                         $this->link_dao->saveExpansionError($link->url, "URL exceeds column width");
                         $total_errors = $total_errors + 1;
                     }
                 } else {
                     $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__);
                     $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere");
                     $total_errors = $total_errors + 1;
                 }
             }
         } else {
             $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $this->link_dao->saveExpansionError($link->url, "Invalid URL");
             $total_errors = $total_errors + 1;
         }
         $has_expanded_flickr_link = false;
     }
     $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }
Пример #6
0
 /**
  * Save expanded version of all unexpanded URLs to data store.
  */
 public function expandRemainingURLs()
 {
     $logger = Logger::getInstance();
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $links_to_expand = $link_dao->getLinksToExpand($this->link_limit);
     $logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link)) {
             $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link;
             while (!$fully_expanded) {
                 $expanded_url = self::untinyurl($short_link, $link_dao, $link, $index, count($links_to_expand));
                 if ($expanded_url == $short_link || $expanded_url == '') {
                     $fully_expanded = true;
                 }
                 $short_link = $expanded_url;
             }
             if ($expanded_url != '') {
                 $image_src = URLProcessor::getImageSource($expanded_url);
                 $link_dao->saveExpandedUrl($link, $expanded_url, '', $image_src);
                 $total_expanded = $total_expanded + 1;
             } else {
                 $total_errors = $total_errors + 1;
             }
         } else {
             $total_errors = $total_errors + 1;
             $logger->logError($link . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $link_dao->saveExpansionError($link, "Invalid URL");
         }
     }
     $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }
 /**
  * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links.
  */
 public function expandOriginalURLs($flickr_api_key = null)
 {
     $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit);
     $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     $has_expanded_flickr_link = false;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link->url)) {
             $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link->url;
             while (!$fully_expanded) {
                 //begin Flickr thumbnail processing
                 if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
                     self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url);
                     $has_expanded_flickr_link = true;
                     $fully_expanded = true;
                 }
                 //end Flickr thumbnail processing
                 $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger);
                 if ($expanded_url == $short_link || $expanded_url == '') {
                     $fully_expanded = true;
                 } else {
                     $this->short_link_dao->insert($link->id, $short_link);
                 }
                 $short_link = $expanded_url;
             }
             if (!$has_expanded_flickr_link) {
                 if ($expanded_url != '') {
                     $image_src = URLProcessor::getImageSource($expanded_url);
                     $this->link_dao->saveExpandedUrl($link->url, $expanded_url, '', $image_src);
                     $total_expanded = $total_expanded + 1;
                 } else {
                     $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__);
                     $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere");
                     $total_errors = $total_errors + 1;
                 }
             }
         } else {
             $total_errors = $total_errors + 1;
             $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $this->link_dao->saveExpansionError($link->url, "Invalid URL");
         }
         $has_expanded_flickr_link = false;
     }
     $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }