/**
  * Set up
  * Initializes Config and Webapp objects
  */
 function setUp()
 {
     $config = Config::getInstance();
     $webapp = Webapp::getInstance();
     $crawler = Crawler::getInstance();
     parent::setUp();
 }
 public function control()
 {
     $output = "";
     $authorized = false;
     if (isset($this->argc) && $this->argc > 1) {
         // check for CLI credentials
         $session = new Session();
         $username = $this->argv[1];
         if ($this->argc > 2) {
             $pw = $this->argv[2];
         } else {
             $pw = getenv('THINKUP_PASSWORD');
         }
         $owner_dao = DAOFactory::getDAO('OwnerDAO');
         $owner = $owner_dao->getByEmail($username);
         if ($owner_dao->isOwnerAuthorized($username, $pw)) {
             $authorized = true;
             Session::completeLogin($owner);
         } else {
             $output = "ERROR: Incorrect username and password.";
         }
     } else {
         // check user is logged in on the web
         if ($this->isLoggedIn()) {
             $authorized = true;
         } else {
             $output = "ERROR: Invalid or missing username and password.";
         }
     }
     if ($authorized) {
         $crawler = Crawler::getInstance();
         $crawler->crawl();
     }
     return $output;
 }
    public function  testFlickrCrawl() {
        $builders = $this->buildData();

        $crawler = Crawler::getInstance();
        $config = Config::getInstance();

        //use fake Flickr API key
        $plugin_builder = FixtureBuilder::build('plugins', array('id'=>'2', 'folder_name'=>'flickrthumbnails'));
        $option_builder = FixtureBuilder::build('options', array(
            'namespace' => OptionDAO::PLUGIN_OPTIONS . '-2',
            'option_name' => 'flickr_api_key',
            'option_value' => 'dummykey') );
        //$config->setValue('flickr_api_key', 'dummykey');

        $this->simulateLogin('*****@*****.**', true);
        $crawler->crawl();

        $ldao = DAOFactory::getDAO('LinkDAO');

        $link = $ldao->getLinkById(43);
        $this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg');
        $this->assertEqual($link->error, '');

        $link = $ldao->getLinkById(42);
        $this->assertEqual($link->expanded_url, '');
        $this->assertEqual($link->error, 'No response from Flickr API');

        $link = $ldao->getLinkById(41);
        $this->assertEqual($link->expanded_url, '');
        $this->assertEqual($link->error, 'No response from Flickr API');
    }
 public function setUp() {
     parent::setUp();
     $this->webapp = Webapp::getInstance();
     $this->crawler = Crawler::getInstance();
     $this->webapp->registerPlugin('twitter', 'TwitterPlugin');
     $this->crawler->registerCrawlerPlugin('TwitterPlugin');
     $this->webapp->setActivePlugin('twitter');
     $this->logger = Logger::getInstance();
 }
Exemple #5
0
 public function testCrawlUnauthorized()
 {
     $builders = $this->buildData();
     $crawler = Crawler::getInstance();
     $crawler->registerPlugin('hellothinkup', 'HelloThinkUpPlugin');
     $crawler->registerCrawlerPlugin('HelloThinkUpPlugin');
     $this->expectException(new UnauthorizedUserException('You need a valid session to launch the crawler.'));
     $crawler->crawl();
     $this->assertNoErrors();
 }
 function testExpandURLsCrawl()
 {
     $crawler = Crawler::getInstance();
     $crawler->crawl();
     //the crawler closes the log so we have to re-open it
     $logger = Logger::getInstance();
     $ldao = DAOFactory::getDAO('LinkDAO');
     $link = $ldao->getLinkById(1);
     $this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/');
     $this->assertEqual($link->error, '');
 }
Exemple #7
0
 /**
  * Test Crawler->crawl
  */
 public function testCrawl()
 {
     $crawler = Crawler::getInstance();
     $crawler->registerPlugin('nonexistent', 'TestFauxPluginOne');
     $crawler->registerCrawlerPlugin('TestFauxPluginOne');
     $this->expectException(new Exception("The TestFauxPluginOne object does not have a crawl method."));
     $crawler->crawl();
     $crawler->registerPlugin('hellothinktank', 'HelloThinkTankPlugin');
     $crawler->registerCrawlerPlugin('HelloThinkTankPlugin');
     $this->assertEqual($crawler->getPluginObject("hellothinktank"), "HelloThinkTankPlugin");
     $crawler->crawl();
 }
 /**
  * Set up
  * Initializes Config and Webapp objects, clears $_SESSION, $_POST, $_REQUEST
  */
 public function setUp()
 {
     parent::setUp();
     Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/'));
     $config = Config::getInstance();
     //disable caching for tests
     $config->setValue('cache_pages', false);
     //tests assume profiling is off
     $config->setValue('enable_profiler', false);
     if ($config->getValue('timezone')) {
         date_default_timezone_set($config->getValue('timezone'));
     }
     $webapp = Webapp::getInstance();
     $crawler = Crawler::getInstance();
 }
 public function testExpandURLsCrawl()
 {
     $builders = $this->buildData();
     $this->simulateLogin('*****@*****.**', true);
     $crawler = Crawler::getInstance();
     $crawler->crawl();
     //the crawler closes the log so we have to re-open it
     $logger = Logger::getInstance();
     $ldao = DAOFactory::getDAO('LinkDAO');
     $link = $ldao->getLinkById(1);
     $this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/');
     $this->assertEqual($link->error, '');
     $link = $ldao->getLinkById(2);
     $this->assertEqual($link->expanded_url, '');
     $this->assertEqual($link->error, 'Error expanding URL');
 }
 /**
  * Initialize Config and Webapp objects, clear $_SESSION, $_POST, $_GET, $_REQUEST
  */
 public function setUp()
 {
     parent::setUp();
     Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/'));
     $config = Config::getInstance();
     //disable caching for tests
     $config->setValue('cache_pages', false);
     //tests assume profiling is off
     $config->setValue('enable_profiler', false);
     if ($config->getValue('timezone')) {
         date_default_timezone_set($config->getValue('timezone'));
     }
     $webapp = Webapp::getInstance();
     $crawler = Crawler::getInstance();
     $this->DEBUG = getenv('TEST_DEBUG') !== false ? true : false;
     self::isTestEnvironmentReady();
 }
 public function authControl()
 {
     Utils::defineConstants();
     if ($this->isAPICall()) {
         // If the request comes from an API call, output JSON instead of HTML
         $this->setContentType('application/json; charset=UTF-8');
     } else {
         $this->setContentType('text/html; charset=UTF-8');
         $this->setViewTemplate('crawler.run-top.tpl');
         echo $this->generateView();
         $config = Config::getInstance();
         $config->setValue('log_location', false);
         //this forces output to just echo to page
         $logger = Logger::getInstance();
         $logger->close();
     }
     try {
         $logger = Logger::getInstance();
         if (isset($_GET['log']) && $_GET['log'] == 'full') {
             $logger->setVerbosity(Logger::ALL_MSGS);
             echo '<pre style="font-family:Courier;font-size:10px;">';
         } else {
             $logger->setVerbosity(Logger::USER_MSGS);
             $logger->enableHTMLOutput();
         }
         $crawler = Crawler::getInstance();
         //close session so that it's not locked by long crawl
         session_write_close();
         $crawler->crawl();
         $logger->close();
     } catch (CrawlerLockedException $e) {
         if ($this->isAPICall()) {
             // Will be caught and handled in ThinkUpController::go()
             throw $e;
         } else {
             // Will appear in the textarea of the HTML page
             echo $e->getMessage();
         }
     }
     if ($this->isAPICall()) {
         echo json_encode((object) array('result' => 'success'));
     } else {
         $this->setViewTemplate('crawler.run-bottom.tpl');
         echo $this->generateView();
     }
 }
 function testFlickrCrawl()
 {
     $crawler = Crawler::getInstance();
     $config = Config::getInstance();
     //use fake Flickr API key
     $config->setValue('flickr_api_key', 'dummykey');
     $crawler->crawl();
     $ldao = DAOFactory::getDAO('LinkDAO');
     $link = $ldao->getLinkById(43);
     $this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg');
     $this->assertEqual($link->error, '');
     $link = $ldao->getLinkById(42);
     $this->assertEqual($link->expanded_url, '');
     $this->assertEqual($link->error, 'No response from Flickr API');
     $link = $ldao->getLinkById(41);
     $this->assertEqual($link->expanded_url, '');
     $this->assertEqual($link->error, 'No response from Flickr API');
 }
 public function authControl()
 {
     Utils::defineConstants();
     if ($this->isAPICall()) {
         // If the request comes from an API call, output JSON instead of HTML
         $this->setContentType('application/json; charset=UTF-8');
     } else {
         $this->setPageTitle("ThinkUp Crawler");
         $this->setViewTemplate('crawler.run-top.tpl');
         $whichphp = exec('which php');
         $php_path = !empty($whichphp) ? $whichphp : 'php';
         $this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. ');
         $rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
         $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>');
         echo $this->generateView();
         echo '<br /><br /><textarea rows="65" cols="110">';
         $config = Config::getInstance();
         $config->setValue('log_location', false);
         //this forces output to just echo to page
         $logger = Logger::getInstance();
         $logger->close();
         // Will make sure any exception catched below appears as plain text, and not as HTML
         $this->setContentType('text/plain; charset=UTF-8');
     }
     try {
         $crawler = Crawler::getInstance();
         $crawler->crawl();
     } catch (CrawlerLockedException $e) {
         if ($this->isAPICall()) {
             // Will be caught and handled in ThinkUpController::go()
             throw $e;
         } else {
             // Will appear in the textarea of the HTML page
             echo $e->getMessage();
         }
     }
     if ($this->isAPICall()) {
         echo json_encode((object) array('result' => 'success'));
     } else {
         echo '</textarea>';
         $this->setViewTemplate('crawler.run-bottom.tpl');
         echo $this->generateView();
     }
 }
Exemple #14
0
Author: Gina Trapani
*/
/**
 *
 * ThinkUp/webapp/plugins/expandurls/controller/expandurls.php
 *
 * Copyright (c) 2009-2011 Gina Trapani
 *
 * LICENSE:
 *
 * This file is part of ThinkUp (http://thinkupapp.com).
 *
 * ThinkUp is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any
 * later version.
 *
 * ThinkUp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with ThinkUp.  If not, see
 * <http://www.gnu.org/licenses/>.
 *
 * @author Gina Trapani <ginatrapani[at]gmail[dot]com>
 * @license http://www.gnu.org/licenses/gpl.html
 * @copyright 2009-2011 Gina Trapani
 */
$webapp = Webapp::getInstance();
$webapp->registerPlugin('expandurls', 'ExpandURLsPlugin');
$crawler = Crawler::getInstance();
$crawler->registerCrawlerPlugin('ExpandURLsPlugin');
 function testGeoEncoderCrawl()
 {
     $builders = $this->buildData();
     $this->simulateLogin('*****@*****.**', true);
     $crawler = Crawler::getInstance();
     $crawler->crawl();
     //the crawler closes the log so we have to re-open it
     $logger = Logger::getInstance();
     $pdao = DAOFactory::getDAO('PostDAO');
     $ldao = DAOFactory::getDAO('LocationDAO');
     // Test 1: Checking Post for Successful Reverse Geoencoding
     $this->assertTrue($pdao->isPostInDB(15645300636.0, 'twitter'));
     $post = $pdao->getPost(15645300636.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 1);
     $this->assertEqual($post->geo, '28.602815,77.049136');
     $this->assertEqual($post->location, 'Sector 4, New Delhi, Haryana, India');
     // Since this is just a post, reply_retweet_distance is 0
     $this->assertEqual($post->reply_retweet_distance, 0);
     // Test 2: Checking Post for successful Reverse Geoencoding
     $post = $pdao->getPost(15219161227.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 1);
     $this->assertEqual($post->geo, '28.56213,77.165297');
     $this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India');
     // Test: Example of unsuccessful geoencoding resulting out of INVALID_REQUEST.
     // NOTE: Not a test case encountered in actual crawl
     $post = $pdao->getPost(15331235880.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 5);
     $this->assertEqual($post->geo, '28.60abc2815 77.049136');
     // Test 1: Checking Post for successful Geoencoding using "place" field
     $post = $pdao->getPost(15052338902.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 1);
     $this->assertEqual($post->geo, '28.6889398,77.1618859');
     $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
     $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
     // Distance between main post and its reply (Geocoding Process)
     $this->assertEqual($post->reply_retweet_distance, 1161);
     // Test 2: Checking Post for successful Geoencoding using "place" field
     // This post is retrieved from tu_encoded_locations
     $post = $pdao->getPost(14914043658.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 1);
     $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
     $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
     // When reply is Not in DB, reply_retweet_distance is -1
     $this->assertFalse($pdao->isPostInDB(999999, 'twitter'));
     $this->assertEqual($post->reply_retweet_distance, -1);
     // Test 1: Checking Post for successful Geoencoding using "location" field (post had is_geo_encoded set to 3)
     $post = $pdao->getPost(15338041815.0, 'twitter');
     $this->assertEqual($post->geo, '19.017656,72.856178');
     $this->assertEqual($post->place, NULL);
     $this->assertEqual($post->location, 'Mumbai, Maharashtra, India');
     $this->assertEqual($post->is_geo_encoded, 1);
     // Test 2: Checking Post for successful Geoencoding using "location" field
     $post = $pdao->getPost(15344199472.0, 'twitter');
     $this->assertEqual($post->location, 'New Delhi, Delhi, India');
     $this->assertEqual($post->is_geo_encoded, 1);
     // Distance between Post and Retweet (Geocoding Process)
     $this->assertEqual($post->reply_retweet_distance, 18);
     // When all three fields are filled, <geo> is given the most preference
     $post = $pdao->getPost(11259110570.0, 'twitter');
     $this->assertEqual($post->geo, '28.56213,77.165297');
     $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
     $this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India');
     $this->assertEqual($post->is_geo_encoded, 1);
     // Distance between reply and post (Reverse Geocoding Process)
     $this->assertEqual($post->reply_retweet_distance, 14);
     // When only place and location are filled, <place> is given preference
     $post = $pdao->getPost(15052338902.0, 'twitter');
     $this->assertEqual($post->geo, '28.6889398,77.1618859');
     $this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
     $this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
     $this->assertEqual($post->is_geo_encoded, 1);
     // Unsuccessful Geoencoding due to place field
     // NOTE: Not a test case encountered in real crawl
     $post = $pdao->getPost(14913946516.0, 'twitter');
     $this->assertEqual($post->geo, NULL);
     $this->assertEqual($post->place, 'abc');
     $this->assertEqual($post->location, 'New Delhi');
     $this->assertEqual($post->is_geo_encoded, 2);
     $this->assertEqual($post->reply_retweet_distance, 0);
     //Unsuccessful Geoencoding due to location field
     $post = $pdao->getPost(15268690400.0, 'twitter');
     $this->assertEqual($post->geo, NULL);
     $this->assertEqual($post->place, NULL);
     $this->assertEqual($post->location, 'abc');
     $this->assertEqual($post->is_geo_encoded, 2);
     //Unsuccessful Geoencoding due to location field resulting in INVALID_REQUEST
     $post = $pdao->getPost(15244973830.0, 'twitter');
     $this->assertEqual($post->location, 'Ü');
     $this->assertEqual($post->is_geo_encoded, 5);
     //Unsuccessful Geoencoding due to all three fields being empty
     $post = $pdao->getPost(15435434230.0, 'twitter');
     $this->assertEqual($post->geo, NULL);
     $this->assertEqual($post->place, NULL);
     $this->assertEqual($post->location, NULL);
     $this->assertEqual($post->is_geo_encoded, 6);
     //Reverse Geoencoding when latitude and longitude are found in location field instead of geo field
     $post = $pdao->getPost(13212618909.0, 'twitter');
     $this->assertEqual($post->geo, '40.681839,-73.983734');
     $this->assertEqual($post->place, NULL);
     $this->assertEqual($post->location, 'Boerum Hill, Brooklyn, NY, USA');
     $this->assertEqual($post->is_geo_encoded, 1);
     // Retweet Distance in case of Reverse Geocoding Process
     $this->assertEqual($post->reply_retweet_distance, 11760);
     //Unsuccessful Geoencoding due to REQUEST_DENIED
     $post = $pdao->getPost(12259110570.0, 'twitter');
     $this->assertEqual($post->place, 'request_denied');
     $this->assertEqual($post->is_geo_encoded, 4);
     //Unsuccessful Geoencoding due to OVER_QUERY_LIMIT
     $post = $pdao->getPost(13259110570.0, 'twitter');
     $this->assertEqual($post->place, 'over_query_limit');
     $this->assertEqual($post->is_geo_encoded, 3);
     //After reaching OVER_QUERY_LIMIT, next posts are not geoencoded
     $post = $pdao->getPost(15645301636.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 0);
     $post = $pdao->getPost(11331235880.0, 'twitter');
     $this->assertEqual($post->is_geo_encoded, 0);
     // Check up filling of tu_encoded_locations table
     $locations = $ldao->getAllLocations();
     $this->assertEqual(count($locations), 6);
     $this->assertEqual($locations[0]['short_name'], "28.602815 77.049136");
     $this->assertEqual($locations[2]['short_name'], "Mumbai");
     $this->assertEqual($locations[5]['short_name'], "40.681839 -73.983734");
 }
 public function testExpandInstagramImageURLs()
 {
     $builders = $this->buildInstagramData();
     $crawler = Crawler::getInstance();
     $config = Config::getInstance();
     $plugin_builder = FixtureBuilder::build('plugins', array('id' => '2', 'folder_name' => 'expandurls'));
     $option_builder = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-2', 'option_name' => 'flickr_api_key', 'option_value' => 'dummykey'));
     $this->simulateLogin('*****@*****.**', true);
     $crawler->crawl();
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $link = $link_dao->getLinkById(43);
     //Instagr.am constantly changes the location of their images so it's an unpredictable assertion
     //        $this->assertEqual($link->expanded_url,
     //        'http://images.instagram.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg');
     $this->assertEqual($link->error, '');
     $link = $link_dao->getLinkById(42);
     $this->assertEqual($link->expanded_url, 'http://instagr.am/41/media/');
     $link = $link_dao->getLinkById(41);
     $this->assertEqual($link->expanded_url, 'http://instagr.am/40/media/');
 }
Exemple #17
0
 public function testCrawlUpgrading()
 {
     // up app version
     $config = Config::getInstance();
     $init_db_version = $config->getValue('THINKUP_VERSION');
     $config->setValue('THINKUP_VERSION', $config->getValue('THINKUP_VERSION') + 10);
     //set a high version num
     $builders = $this->buildData();
     $crawler = Crawler::getInstance();
     $crawler->registerPlugin('hellothinkup', 'HelloThinkUpPlugin');
     $crawler->registerCrawlerPlugin('HelloThinkUpPlugin');
     $this->simulateLogin('*****@*****.**', true);
     $this->expectException(new InstallerException('ThinkUp needs a database migration, so we are unable to run the crawler.'));
     $crawler->crawl();
     // reset version
     $config->setValue('THINKUP_VERSION', $init_db_version);
 }
 public function testBitlyCrawl()
 {
     $builders = $this->buildBitlyData();
     $crawler = Crawler::getInstance();
     $config = Config::getInstance();
     //use fake Bitly API key
     $builders[] = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-4', 'option_name' => 'bitly_api_key', 'option_value' => 'dummykey'));
     //use fake Bitly login name
     $builder[] = FixtureBuilder::build('options', array('namespace' => OptionDAO::PLUGIN_OPTIONS . '-4', 'option_name' => 'bitly_login', 'option_value' => 'bitly123'));
     $this->simulateLogin('*****@*****.**', true);
     $crawler->crawl();
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $link = $link_dao->getLinkById(43);
     $this->assertEqual($link->expanded_url, 'http://static.ak.fbcdn.net/rsrc.php/zw/r/ZEKh4ZZQY74.png');
     $this->assertEqual($link->title, 'Bitly Test URL');
     $this->assertEqual($link->error, '');
     $link = $link_dao->getLinkById(42);
     $this->assertEqual($link->expanded_url, 'http://bitly.com/a/warning?url=http%3a%2f%2fwww%2ealideas%2ecom%2f&hash=41');
     $this->assertEqual($link->error, 'No response from http://bit.ly API');
     $link = $link_dao->getLinkById(41);
     $this->debug($link->url);
     $this->assertEqual($link->expanded_url, 'http://static.ak.fbcdn.net/rsrc.php/zw/r/ZEKh4ZZQY74.png');
     $this->assertEqual($link->error, 'No response from http://bit.ly API');
 }