public function testValidateURL() { $this->assertFalse(Utils::validateURL('yaya')); $this->assertFalse(Utils::validateURL('http:///thediviningwand.com')); $this->assertTrue(Utils::validateURL('http://asdf.com')); $this->assertTrue(Utils::validateURL('https://asdf.com')); }
/** * Run when the crawler does */ public function crawl() { $logger = Logger::getInstance(); $ldao = DAOFactory::getDAO('LinkDAO'); //@TODO Set limit on total number of links to expand per crawler run in the plugin settings, for now 1500 $linkstoexpand = $ldao->getLinksToExpand(1500); $logger->logStatus(count($linkstoexpand) . " links to expand", "Expand URLs Plugin"); foreach ($linkstoexpand as $l) { if (Utils::validateURL($l)) { $eurl = self::untinyurl($l, $ldao); if ($eurl != '') { $ldao->saveExpandedUrl($l, $eurl); } } else { $logger->logStatus($l . " is not a valid URL; skipping expansion", "Expand URLs Plugin"); } } $logger->logStatus("URL expansion complete for this run", "Expand URLs Plugin"); $logger->close(); # Close logging }
/** * Run when the crawler does * @TODO Set limit on total number of links to expand per crawler run in the plugin settings, for now 1500 */ public function crawl() { $logger = Logger::getInstance(); $logger->setUsername(null); $ldao = DAOFactory::getDAO('LinkDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('expandurls', true); $total_links_to_expand = isset($options['links_to_expand']->option_value) ? (int)$options['links_to_expand']->option_value : 1500; $linkstoexpand = $ldao->getLinksToExpand($total_links_to_expand); $logger->logUserInfo(count($linkstoexpand)." links to expand. Please wait. Working...", __METHOD__.','.__LINE__); $total_expanded = 0; $total_errors = 0; foreach ($linkstoexpand as $l) { if (Utils::validateURL($l)) { $logger->logInfo("Expanding ".($total_expanded+1). " of ".count($linkstoexpand)." (".$l.")", __METHOD__.','.__LINE__); $eurl = self::untinyurl($l, $ldao); if ($eurl != '') { $ldao->saveExpandedUrl($l, $eurl); $total_expanded = $total_expanded + 1; } else { $total_errors = $total_errors + 1; } } else { $total_errors = $total_errors + 1; $logger->logError($l." is not a valid URL; skipping expansion", __METHOD__.','.__LINE__); } } $logger->logUserSuccess($total_expanded." URLs successfully expanded (".$total_errors." errors).", __METHOD__.','.__LINE__); }
/** * Expand all unexpanded URLs * @param $total_links_to_expand The number of links to expand */ public function expandRemainingURLs($total_links_to_expand) { $logger = Logger::getInstance(); $link_dao = DAOFactory::getDAO('LinkDAO'); $linkstoexpand = $link_dao->getLinksToExpand($total_links_to_expand); $logger->logUserInfo(count($linkstoexpand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; foreach ($linkstoexpand as $l) { if (Utils::validateURL($l)) { $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($linkstoexpand) . " (" . $l . ")", __METHOD__ . ',' . __LINE__); $eurl = self::untinyurl($l, $link_dao); if ($eurl != '') { $link_dao->saveExpandedUrl($l, $eurl); $total_expanded = $total_expanded + 1; } else { $total_errors = $total_errors + 1; } } else { $total_errors = $total_errors + 1; $logger->logError($l . " is not a valid URL; skipping expansion", __METHOD__ . ',' . __LINE__); } } $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
/** * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links. */ public function expandOriginalURLs($flickr_api_key = null) { $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit); $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; $has_expanded_flickr_link = false; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link->url)) { $endless_loop_prevention_counter = 0; $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link->url; while (!$fully_expanded) { //begin Flickr thumbnail processing if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url); $has_expanded_flickr_link = true; $fully_expanded = true; } //end Flickr thumbnail processing $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger); if ($expanded_url == $short_link || $expanded_url == '' || $endless_loop_prevention_counter > self::EXPANSION_CAP) { $fully_expanded = true; } else { try { $this->short_link_dao->insert($link->id, $short_link); } catch (DataExceedsColumnWidthException $e) { $this->logger->logError($short_link . " short link record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__); $fully_expanded = true; } } if (strlen($expanded_url) < 256) { $short_link = $expanded_url; } else { $fully_expanded = true; } $endless_loop_prevention_counter++; } if (!$has_expanded_flickr_link) { if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $url_details = URLExpander::getWebPageDetails($expanded_url); try { $this->link_dao->saveExpandedUrl($link->url, $expanded_url, $url_details['title'], $image_src, $url_details['description']); $total_expanded = $total_expanded + 1; } catch (DataExceedsColumnWidthException $e) { $this->logger->logError($link->url . " record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "URL exceeds column width"); $total_errors = $total_errors + 1; } } else { $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere"); $total_errors = $total_errors + 1; } } } else { $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL"); $total_errors = $total_errors + 1; } $has_expanded_flickr_link = false; } $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
/** * Save expanded version of all unexpanded URLs to data store. */ public function expandRemainingURLs() { $logger = Logger::getInstance(); $link_dao = DAOFactory::getDAO('LinkDAO'); $links_to_expand = $link_dao->getLinksToExpand($this->link_limit); $logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link)) { $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link; while (!$fully_expanded) { $expanded_url = self::untinyurl($short_link, $link_dao, $link, $index, count($links_to_expand)); if ($expanded_url == $short_link || $expanded_url == '') { $fully_expanded = true; } $short_link = $expanded_url; } if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $link_dao->saveExpandedUrl($link, $expanded_url, '', $image_src); $total_expanded = $total_expanded + 1; } else { $total_errors = $total_errors + 1; } } else { $total_errors = $total_errors + 1; $logger->logError($link . " not a valid URL", __METHOD__ . ',' . __LINE__); $link_dao->saveExpansionError($link, "Invalid URL"); } } $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
/** * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links. */ public function expandOriginalURLs($flickr_api_key = null) { $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit); $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; $has_expanded_flickr_link = false; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link->url)) { $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link->url; while (!$fully_expanded) { //begin Flickr thumbnail processing if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url); $has_expanded_flickr_link = true; $fully_expanded = true; } //end Flickr thumbnail processing $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger); if ($expanded_url == $short_link || $expanded_url == '') { $fully_expanded = true; } else { $this->short_link_dao->insert($link->id, $short_link); } $short_link = $expanded_url; } if (!$has_expanded_flickr_link) { if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $this->link_dao->saveExpandedUrl($link->url, $expanded_url, '', $image_src); $total_expanded = $total_expanded + 1; } else { $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere"); $total_errors = $total_errors + 1; } } } else { $total_errors = $total_errors + 1; $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL"); } $has_expanded_flickr_link = false; } $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }