/** * Set up * Initializes Config and Webapp objects */ function setUp() { $config = Config::getInstance(); $webapp = Webapp::getInstance(); $crawler = Crawler::getInstance(); parent::setUp(); }
public function control() { $output = ""; $authorized = false; if (isset($this->argc) && $this->argc > 1) { // check for CLI credentials $session = new Session(); $username = $this->argv[1]; if ($this->argc > 2) { $pw = $this->argv[2]; } else { $pw = getenv('THINKUP_PASSWORD'); } $owner_dao = DAOFactory::getDAO('OwnerDAO'); $owner = $owner_dao->getByEmail($username); if ($owner_dao->isOwnerAuthorized($username, $pw)) { $authorized = true; Session::completeLogin($owner); } else { $output = "ERROR: Incorrect username and password."; } } else { // check user is logged in on the web if ($this->isLoggedIn()) { $authorized = true; } else { $output = "ERROR: Invalid or missing username and password."; } } if ($authorized) { $crawler = Crawler::getInstance(); $crawler->crawl(); } return $output; }
public function testFlickrCrawl() { $builders = $this->buildData(); $crawler = Crawler::getInstance(); $config = Config::getInstance(); //use fake Flickr API key $plugin_builder = FixtureBuilder::build('plugins', array('id'=>'2', 'folder_name'=>'flickrthumbnails')); $option_builder = FixtureBuilder::build('options', array( 'namespace' => OptionDAO::PLUGIN_OPTIONS . '-2', 'option_name' => 'flickr_api_key', 'option_value' => 'dummykey') ); //$config->setValue('flickr_api_key', 'dummykey'); $this->simulateLogin('*****@*****.**', true); $crawler->crawl(); $ldao = DAOFactory::getDAO('LinkDAO'); $link = $ldao->getLinkById(43); $this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg'); $this->assertEqual($link->error, ''); $link = $ldao->getLinkById(42); $this->assertEqual($link->expanded_url, ''); $this->assertEqual($link->error, 'No response from Flickr API'); $link = $ldao->getLinkById(41); $this->assertEqual($link->expanded_url, ''); $this->assertEqual($link->error, 'No response from Flickr API'); }
public function setUp() { parent::setUp(); $this->webapp = Webapp::getInstance(); $this->crawler = Crawler::getInstance(); $this->webapp->registerPlugin('twitter', 'TwitterPlugin'); $this->crawler->registerCrawlerPlugin('TwitterPlugin'); $this->webapp->setActivePlugin('twitter'); $this->logger = Logger::getInstance(); }
public function testCrawlUnauthorized() { $builders = $this->buildData(); $crawler = Crawler::getInstance(); $crawler->registerPlugin('hellothinkup', 'HelloThinkUpPlugin'); $crawler->registerCrawlerPlugin('HelloThinkUpPlugin'); $this->expectException(new UnauthorizedUserException('You need a valid session to launch the crawler.')); $crawler->crawl(); $this->assertNoErrors(); }
function testExpandURLsCrawl() { $crawler = Crawler::getInstance(); $crawler->crawl(); //the crawler closes the log so we have to re-open it $logger = Logger::getInstance(); $ldao = DAOFactory::getDAO('LinkDAO'); $link = $ldao->getLinkById(1); $this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/'); $this->assertEqual($link->error, ''); }
/** * Test Crawler->crawl */ public function testCrawl() { $crawler = Crawler::getInstance(); $crawler->registerPlugin('nonexistent', 'TestFauxPluginOne'); $crawler->registerCrawlerPlugin('TestFauxPluginOne'); $this->expectException(new Exception("The TestFauxPluginOne object does not have a crawl method.")); $crawler->crawl(); $crawler->registerPlugin('hellothinktank', 'HelloThinkTankPlugin'); $crawler->registerCrawlerPlugin('HelloThinkTankPlugin'); $this->assertEqual($crawler->getPluginObject("hellothinktank"), "HelloThinkTankPlugin"); $crawler->crawl(); }
/** * Set up * Initializes Config and Webapp objects, clears $_SESSION, $_POST, $_REQUEST */ public function setUp() { parent::setUp(); Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/')); $config = Config::getInstance(); //disable caching for tests $config->setValue('cache_pages', false); //tests assume profiling is off $config->setValue('enable_profiler', false); if ($config->getValue('timezone')) { date_default_timezone_set($config->getValue('timezone')); } $webapp = Webapp::getInstance(); $crawler = Crawler::getInstance(); }
public function testExpandURLsCrawl() { $builders = $this->buildData(); $this->simulateLogin('*****@*****.**', true); $crawler = Crawler::getInstance(); $crawler->crawl(); //the crawler closes the log so we have to re-open it $logger = Logger::getInstance(); $ldao = DAOFactory::getDAO('LinkDAO'); $link = $ldao->getLinkById(1); $this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/'); $this->assertEqual($link->error, ''); $link = $ldao->getLinkById(2); $this->assertEqual($link->expanded_url, ''); $this->assertEqual($link->error, 'Error expanding URL'); }
/** * Initialize Config and Webapp objects, clear $_SESSION, $_POST, $_GET, $_REQUEST */ public function setUp() { parent::setUp(); Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/')); $config = Config::getInstance(); //disable caching for tests $config->setValue('cache_pages', false); //tests assume profiling is off $config->setValue('enable_profiler', false); if ($config->getValue('timezone')) { date_default_timezone_set($config->getValue('timezone')); } $webapp = Webapp::getInstance(); $crawler = Crawler::getInstance(); $this->DEBUG = getenv('TEST_DEBUG') !== false ? true : false; self::isTestEnvironmentReady(); }
public function authControl() { Utils::defineConstants(); if ($this->isAPICall()) { // If the request comes from an API call, output JSON instead of HTML $this->setContentType('application/json; charset=UTF-8'); } else { $this->setContentType('text/html; charset=UTF-8'); $this->setViewTemplate('crawler.run-top.tpl'); echo $this->generateView(); $config = Config::getInstance(); $config->setValue('log_location', false); //this forces output to just echo to page $logger = Logger::getInstance(); $logger->close(); } try { $logger = Logger::getInstance(); if (isset($_GET['log']) && $_GET['log'] == 'full') { $logger->setVerbosity(Logger::ALL_MSGS); echo '<pre style="font-family:Courier;font-size:10px;">'; } else { $logger->setVerbosity(Logger::USER_MSGS); $logger->enableHTMLOutput(); } $crawler = Crawler::getInstance(); //close session so that it's not locked by long crawl session_write_close(); $crawler->crawl(); $logger->close(); } catch (CrawlerLockedException $e) { if ($this->isAPICall()) { // Will be caught and handled in ThinkUpController::go() throw $e; } else { // Will appear in the textarea of the HTML page echo $e->getMessage(); } } if ($this->isAPICall()) { echo json_encode((object) array('result' => 'success')); } else { $this->setViewTemplate('crawler.run-bottom.tpl'); echo $this->generateView(); } }
function testFlickrCrawl() { $crawler = Crawler::getInstance(); $config = Config::getInstance(); //use fake Flickr API key $config->setValue('flickr_api_key', 'dummykey'); $crawler->crawl(); $ldao = DAOFactory::getDAO('LinkDAO'); $link = $ldao->getLinkById(43); $this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg'); $this->assertEqual($link->error, ''); $link = $ldao->getLinkById(42); $this->assertEqual($link->expanded_url, ''); $this->assertEqual($link->error, 'No response from Flickr API'); $link = $ldao->getLinkById(41); $this->assertEqual($link->expanded_url, ''); $this->assertEqual($link->error, 'No response from Flickr API'); }
public function authControl() { Utils::defineConstants(); if ($this->isAPICall()) { // If the request comes from an API call, output JSON instead of HTML $this->setContentType('application/json; charset=UTF-8'); } else { $this->setPageTitle("ThinkUp Crawler"); $this->setViewTemplate('crawler.run-top.tpl'); $whichphp = exec('which php'); $php_path = !empty($whichphp) ? $whichphp : 'php'; $this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. '); $rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser()); $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>'); echo $this->generateView(); echo '<br /><br /><textarea rows="65" cols="110">'; $config = Config::getInstance(); $config->setValue('log_location', false); //this forces output to just echo to page $logger = Logger::getInstance(); $logger->close(); // Will make sure any exception catched below appears as plain text, and not as HTML $this->setContentType('text/plain; charset=UTF-8'); } try { $crawler = Crawler::getInstance(); $crawler->crawl(); } catch (CrawlerLockedException $e) { if ($this->isAPICall()) { // Will be caught and handled in ThinkUpController::go() throw $e; } else { // Will appear in the textarea of the HTML page echo $e->getMessage(); } } if ($this->isAPICall()) { echo json_encode((object) array('result' => 'success')); } else { echo '</textarea>'; $this->setViewTemplate('crawler.run-bottom.tpl'); echo $this->generateView(); } }
Author: Gina Trapani */ /** * * ThinkUp/webapp/plugins/expandurls/controller/expandurls.php * * Copyright (c) 2009-2011 Gina Trapani * * LICENSE: * * This file is part of ThinkUp (http://thinkupapp.com). * * ThinkUp is free software: you can redistribute it and/or modify it under the terms of the GNU General Public * License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any * later version. * * ThinkUp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with ThinkUp. If not, see * <http://www.gnu.org/licenses/>. * * @author Gina Trapani <ginatrapani[at]gmail[dot]com> * @license http://www.gnu.org/licenses/gpl.html * @copyright 2009-2011 Gina Trapani */ $webapp = Webapp::getInstance(); $webapp->registerPlugin('expandurls', 'ExpandURLsPlugin'); $crawler = Crawler::getInstance(); $crawler->registerCrawlerPlugin('ExpandURLsPlugin');
function testGeoEncoderCrawl() { $builders = $this->buildData(); $this->simulateLogin('*****@*****.**', true); $crawler = Crawler::getInstance(); $crawler->crawl(); //the crawler closes the log so we have to re-open it $logger = Logger::getInstance(); $pdao = DAOFactory::getDAO('PostDAO'); $ldao = DAOFactory::getDAO('LocationDAO'); // Test 1: Checking Post for Successful Reverse Geoencoding $this->assertTrue($pdao->isPostInDB(15645300636.0, 'twitter')); $post = $pdao->getPost(15645300636.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 1); $this->assertEqual($post->geo, '28.602815,77.049136'); $this->assertEqual($post->location, 'Sector 4, New Delhi, Haryana, India'); // Since this is just a post, reply_retweet_distance is 0 $this->assertEqual($post->reply_retweet_distance, 0); // Test 2: Checking Post for successful Reverse Geoencoding $post = $pdao->getPost(15219161227.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 1); $this->assertEqual($post->geo, '28.56213,77.165297'); $this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India'); // Test: Example of unsuccessful geoencoding resulting out of INVALID_REQUEST. // NOTE: Not a test case encountered in actual crawl $post = $pdao->getPost(15331235880.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 5); $this->assertEqual($post->geo, '28.60abc2815 77.049136'); // Test 1: Checking Post for successful Geoencoding using "place" field $post = $pdao->getPost(15052338902.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 1); $this->assertEqual($post->geo, '28.6889398,77.1618859'); $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi'); $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India'); // Distance between main post and its reply (Geocoding Process) $this->assertEqual($post->reply_retweet_distance, 1161); // Test 2: Checking Post for successful Geoencoding using "place" field // This post is retrieved from tu_encoded_locations $post = $pdao->getPost(14914043658.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 1); $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi'); $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India'); // When reply is Not in DB, reply_retweet_distance is -1 $this->assertFalse($pdao->isPostInDB(999999, 'twitter')); $this->assertEqual($post->reply_retweet_distance, -1); // Test 1: Checking Post for successful Geoencoding using "location" field (post had is_geo_encoded set to 3) $post = $pdao->getPost(15338041815.0, 'twitter'); $this->assertEqual($post->geo, '19.017656,72.856178'); $this->assertEqual($post->place, NULL); $this->assertEqual($post->location, 'Mumbai, Maharashtra, India'); $this->assertEqual($post->is_geo_encoded, 1); // Test 2: Checking Post for successful Geoencoding using "location" field $post = $pdao->getPost(15344199472.0, 'twitter'); $this->assertEqual($post->location, 'New Delhi, Delhi, India'); $this->assertEqual($post->is_geo_encoded, 1); // Distance between Post and Retweet (Geocoding Process) $this->assertEqual($post->reply_retweet_distance, 18); // When all three fields are filled, <geo> is given the most preference $post = $pdao->getPost(11259110570.0, 'twitter'); $this->assertEqual($post->geo, '28.56213,77.165297'); $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi'); $this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India'); $this->assertEqual($post->is_geo_encoded, 1); // Distance between reply and post (Reverse Geocoding Process) $this->assertEqual($post->reply_retweet_distance, 14); // When only place and location are filled, <place> is given preference $post = $pdao->getPost(15052338902.0, 'twitter'); $this->assertEqual($post->geo, '28.6889398,77.1618859'); $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi'); $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India'); $this->assertEqual($post->is_geo_encoded, 1); // Unsuccessful Geoencoding due to place field // NOTE: Not a test case encountered in real crawl $post = $pdao->getPost(14913946516.0, 'twitter'); $this->assertEqual($post->geo, NULL); $this->assertEqual($post->place, 'abc'); $this->assertEqual($post->location, 'New Delhi'); $this->assertEqual($post->is_geo_encoded, 2); $this->assertEqual($post->reply_retweet_distance, 0); //Unsuccessful Geoencoding due to location field $post = $pdao->getPost(15268690400.0, 'twitter'); $this->assertEqual($post->geo, NULL); $this->assertEqual($post->place, NULL); $this->assertEqual($post->location, 'abc'); $this->assertEqual($post->is_geo_encoded, 2); //Unsuccessful Geoencoding due to location field resulting in INVALID_REQUEST $post = $pdao->getPost(15244973830.0, 'twitter'); $this->assertEqual($post->location, 'Ü'); $this->assertEqual($post->is_geo_encoded, 5); //Unsuccessful Geoencoding due to all three fields being empty $post = $pdao->getPost(15435434230.0, 'twitter'); $this->assertEqual($post->geo, NULL); $this->assertEqual($post->place, NULL); $this->assertEqual($post->location, NULL); $this->assertEqual($post->is_geo_encoded, 6); //Reverse Geoencoding when latitude and longitude are found in location field instead of geo field $post = $pdao->getPost(13212618909.0, 'twitter'); $this->assertEqual($post->geo, '40.681839,-73.983734'); $this->assertEqual($post->place, NULL); $this->assertEqual($post->location, 'Boerum Hill, Brooklyn, NY, USA'); $this->assertEqual($post->is_geo_encoded, 1); // Retweet Distance in case of Reverse Geocoding Process $this->assertEqual($post->reply_retweet_distance, 11760); //Unsuccessful Geoencoding due to REQUEST_DENIED $post = $pdao->getPost(12259110570.0, 'twitter'); $this->assertEqual($post->place, 'request_denied'); $this->assertEqual($post->is_geo_encoded, 4); //Unsuccessful Geoencoding due to OVER_QUERY_LIMIT $post = $pdao->getPost(13259110570.0, 'twitter'); $this->assertEqual($post->place, 'over_query_limit'); $this->assertEqual($post->is_geo_encoded, 3); //After reaching OVER_QUERY_LIMIT, next posts are not geoencoded $post = $pdao->getPost(15645301636.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 0); $post = $pdao->getPost(11331235880.0, 'twitter'); $this->assertEqual($post->is_geo_encoded, 0); // Check up filling of tu_encoded_locations table $locations = $ldao->getAllLocations(); $this->assertEqual(count($locations), 6); $this->assertEqual($locations[0]['short_name'], "28.602815 77.049136"); $this->assertEqual($locations[2]['short_name'], "Mumbai"); $this->assertEqual($locations[5]['short_name'], "40.681839 -73.983734"); }
public function testExpandInstagramImageURLs() { $builders = $this->buildInstagramData(); $crawler = Crawler::getInstance(); $config = Config::getInstance(); $plugin_builder = FixtureBuilder::build('plugins', array('id' => '2', 'folder_name' => 'expandurls')); $option_builder = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-2', 'option_name' => 'flickr_api_key', 'option_value' => 'dummykey')); $this->simulateLogin('*****@*****.**', true); $crawler->crawl(); $link_dao = DAOFactory::getDAO('LinkDAO'); $link = $link_dao->getLinkById(43); //Instagr.am constantly changes the location of their images so it's an unpredictable assertion // $this->assertEqual($link->expanded_url, // 'http://images.instagram.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg'); $this->assertEqual($link->error, ''); $link = $link_dao->getLinkById(42); $this->assertEqual($link->expanded_url, 'http://instagr.am/41/media/'); $link = $link_dao->getLinkById(41); $this->assertEqual($link->expanded_url, 'http://instagr.am/40/media/'); }
public function testCrawlUpgrading() { // up app version $config = Config::getInstance(); $init_db_version = $config->getValue('THINKUP_VERSION'); $config->setValue('THINKUP_VERSION', $config->getValue('THINKUP_VERSION') + 10); //set a high version num $builders = $this->buildData(); $crawler = Crawler::getInstance(); $crawler->registerPlugin('hellothinkup', 'HelloThinkUpPlugin'); $crawler->registerCrawlerPlugin('HelloThinkUpPlugin'); $this->simulateLogin('*****@*****.**', true); $this->expectException(new InstallerException('ThinkUp needs a database migration, so we are unable to run the crawler.')); $crawler->crawl(); // reset version $config->setValue('THINKUP_VERSION', $init_db_version); }
public function testBitlyCrawl() { $builders = $this->buildBitlyData(); $crawler = Crawler::getInstance(); $config = Config::getInstance(); //use fake Bitly API key $builders[] = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-4', 'option_name' => 'bitly_api_key', 'option_value' => 'dummykey')); //use fake Bitly login name $builder[] = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-4', 'option_name' => 'bitly_login', 'option_value' => 'bitly123')); $this->simulateLogin('*****@*****.**', true); $crawler->crawl(); $link_dao = DAOFactory::getDAO('LinkDAO'); $link = $link_dao->getLinkById(43); $this->assertEqual($link->expanded_url, 'http://static.ak.fbcdn.net/rsrc.php/zw/r/ZEKh4ZZQY74.png'); $this->assertEqual($link->title, 'Bitly Test URL'); $this->assertEqual($link->error, ''); $link = $link_dao->getLinkById(42); $this->assertEqual($link->expanded_url, 'http://bitly.com/a/warning?url=http%3a%2f%2fwww%2ealideas%2ecom%2f&hash=41'); $this->assertEqual($link->error, 'No response from http://bit.ly API'); $link = $link_dao->getLinkById(41); $this->debug($link->url); $this->assertEqual($link->expanded_url, 'http://static.ak.fbcdn.net/rsrc.php/zw/r/ZEKh4ZZQY74.png'); $this->assertEqual($link->error, 'No response from http://bit.ly API'); }