Ejemplo n.º 1
0
 /**
  * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed.
  * @return string rendered view markup
  */
 public function authControl()
 {
     Utils::defineConstants();
     $this->setContentType('application/rss+xml; charset=UTF-8');
     $this->setViewTemplate('rss.tpl');
     $config = Config::getInstance();
     $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate');
     if (empty($rss_crawler_refresh_rate)) {
         $rss_crawler_refresh_rate = 20;
         // minutes
     }
     $protocol = isset($_SERVER['HTTPS']) ? 'https' : 'http';
     $base_url = "{$protocol}://" . $_SERVER['HTTP_HOST'] . THINKUP_BASE_URL;
     $crawler_launched = false;
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $freshest_instance = $instance_dao->getInstanceFreshestOne();
     $crawler_last_run = strtotime($freshest_instance->crawler_last_run);
     if ($crawler_last_run < time() - $rss_crawler_refresh_rate * 60) {
         $crawler_run_url = $base_url . 'run.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $crawler_run_url);
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_TIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($ch, CURLOPT_HEADER, true);
         $result = curl_exec($ch);
         curl_close($ch);
         $body = substr($result, strpos($result, "\r\n\r\n") + 4);
         if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) {
             $json = json_decode($body);
             if (isset($json->error)) {
                 $crawler_launched = false;
             } else {
                 if (isset($json->result) && $json->result == 'success') {
                     $crawler_launched = true;
                 }
             }
         } else {
             if (strpos($body, 'Error starting crawler') !== FALSE) {
                 $crawler_launched = false;
             } else {
                 $crawler_launched = true;
             }
         }
     }
     $items = array();
     if ($crawler_launched) {
         $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s');
         $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s'));
         $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run.";
         $items[] = self::createRSSItem($title, $link, $description);
     }
     $items = array_merge($items, $this->getAdditionalItems($base_url));
     $this->addToView('items', $items);
     $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser()));
     $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate));
     return $this->generateView();
 }
Ejemplo n.º 2
0
 public function authControl()
 {
     Utils::defineConstants();
     if ($this->isAPICall()) {
         // If the request comes from an API call, output JSON instead of HTML
         $this->setContentType('application/json; charset=UTF-8');
     } else {
         $this->setPageTitle("ThinkUp Crawler");
         $this->setViewTemplate('crawler.run-top.tpl');
         $whichphp = exec('which php');
         $php_path = !empty($whichphp) ? $whichphp : 'php';
         $this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. ');
         $rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
         $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>');
         echo $this->generateView();
         echo '<br /><br /><textarea rows="65" cols="110">';
         $config = Config::getInstance();
         $config->setValue('log_location', false);
         //this forces output to just echo to page
         $logger = Logger::getInstance();
         $logger->close();
         // Will make sure any exception catched below appears as plain text, and not as HTML
         $this->setContentType('text/plain; charset=UTF-8');
     }
     try {
         $crawler = Crawler::getInstance();
         $crawler->crawl();
     } catch (CrawlerLockedException $e) {
         if ($this->isAPICall()) {
             // Will be caught and handled in ThinkUpController::go()
             throw $e;
         } else {
             // Will appear in the textarea of the HTML page
             echo $e->getMessage();
         }
     }
     if ($this->isAPICall()) {
         echo json_encode((object) array('result' => 'success'));
     } else {
         echo '</textarea>';
         $this->setViewTemplate('crawler.run-bottom.tpl');
         echo $this->generateView();
     }
 }
 public function authControl() {
     Utils::defineConstants();
     $this->setContentType('text/html; charset=UTF-8');
     $this->setPageTitle("ThinkUp Crawler");
     $this->setViewTemplate('crawler.updatenow.tpl');
     $whichphp = @exec('which php');
     $php_path =  (!empty($whichphp))?$whichphp:'php';
     $rss_url = THINKUP_BASE_URL.'rss.php?'.ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
     $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to '.
         '<strong><a href="'.$rss_url.'" target="_blank">this RSS feed</a></strong> '.
         'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that '.
         'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />'.
         '<code style="font-family:Courier">cd '.THINKUP_WEBAPP_PATH.
         'crawler/;export THINKUP_PASSWORD=yourpassword; '.$php_path.' crawl.php '.$this->getLoggedInUser().
         '</code>');
     if (isset($_GET['log']) && $_GET['log'] == 'full') {
         $this->addToView('log', 'full');
     }
     return $this->generateView();
 }
Ejemplo n.º 4
0
 public function authControl()
 {
     $this->disableCaching();
     // we don't want to cache the rss link with api key as it can get updated
     Utils::defineConstants();
     $this->setContentType('text/html; charset=UTF-8');
     $this->setPageTitle("ThinkUp Crawler");
     $this->setViewTemplate('crawler.updatenow.tpl');
     $whichphp = @exec('which php');
     $php_path = !empty($whichphp) ? $whichphp : 'php';
     $email = $this->getLoggedInUser();
     $owner = parent::getOwner($email);
     $rss_url = THINKUP_BASE_URL . sprintf('rss.php?un=%s&as=%s', urlencode($email), $owner->api_key);
     $config = Config::getInstance();
     $site_root_path = $config->getValue('site_root_path');
     $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this secret RSS feed</a></strong> ' . 'in your favorite newsreader. Accidentally share the feed URL? ' . '<a href="' . $site_root_path . 'account/index.php?m=manage#instances">Reset it.</a>' . '<br /><br />Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code><br /><br /><a href="http://thinkupapp.com/docs/userguide/datacapture.html">Learn more about ' . 'how to update your ThinkUp data</a>.');
     if (isset($_GET['log']) && $_GET['log'] == 'full') {
         $this->addToView('log', 'full');
     }
     return $this->generateView();
 }
Ejemplo n.º 5
0
 public function testGetAuthParameters()
 {
     $builders = $this->buildData();
     $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=c9089f3c9adaf0186f6ffb1ee8d6501c');
 }
Ejemplo n.º 6
0
 /**
  * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed.
  * @return string rendered view markup
  */
 public function authControl()
 {
     $this->setContentType('application/rss+xml; charset=UTF-8');
     $this->setViewTemplate('rss.tpl');
     $config = Config::getInstance();
     $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate');
     if (empty($rss_crawler_refresh_rate)) {
         $rss_crawler_refresh_rate = 20;
         // minutes
     }
     $base_url = Utils::getApplicationURL();
     $crawler_plugin_registrar_launched = false;
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $email = $this->getLoggedInUser();
     $owner = parent::getOwner($email);
     $freshest_instance = $instance_dao->getFreshestByOwnerId($owner->id);
     if ($freshest_instance) {
         $crawler_plugin_registrar_last_run = strtotime($freshest_instance->crawler_last_run);
     }
     if ($freshest_instance && $crawler_plugin_registrar_last_run < time() - $rss_crawler_refresh_rate * 60) {
         $crawler_plugin_registrar_run_url = $base_url . 'crawler/run.php?' . sprintf('un=%s&as=%s', $email, $owner->api_key);
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $crawler_plugin_registrar_run_url);
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_TIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($ch, CURLOPT_HEADER, true);
         $result = curl_exec($ch);
         curl_close($ch);
         $body = substr($result, strpos($result, "\r\n\r\n") + 4);
         if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) {
             $json = json_decode($body);
             if (isset($json->error)) {
                 $crawler_plugin_registrar_launched = false;
             } else {
                 if (isset($json->result) && $json->result == 'success') {
                     $crawler_plugin_registrar_launched = true;
                 }
             }
         } else {
             if (strpos($body, 'Error starting crawler') !== FALSE) {
                 $crawler_plugin_registrar_launched = false;
             } else {
                 $crawler_plugin_registrar_launched = true;
             }
         }
     }
     $items = array();
     $logger = Logger::getInstance();
     // Don't return an item if there is a crawler log defined;
     // it would just duplicate the information available in that file.
     if ($crawler_plugin_registrar_launched && !isset($logger->log)) {
         $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s');
         $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s'));
         $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run.";
         $items[] = self::createRSSItem($title, $link, $description);
     }
     $items = array_merge($items, $this->getAdditionalItems($base_url));
     $this->addToView('items', $items);
     $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser()));
     $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate));
     return $this->generateView();
 }
 /**
  * Gets an owner by email address
  * @param str Email address
  * @return Owner
  */
 protected static function getOwner($email)
 {
     if (self::$owner) {
         return self::$owner;
     } else {
         $owner_dao = DAOFactory::getDAO('OwnerDAO');
         self::$owner = $owner_dao->getByEmail($email);
         return self::$owner;
     }
 }
Ejemplo n.º 8
0
 /**
  * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed.
  * @return string rendered view markup
  */
 public function authControl()
 {
     $this->setContentType('application/rss+xml; charset=UTF-8');
     $this->setViewTemplate('rss.tpl');
     $config = Config::getInstance();
     $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate');
     if (empty($rss_crawler_refresh_rate)) {
         $rss_crawler_refresh_rate = 20;
         // minutes
     }
     $base_url = Utils::getApplicationURL();
     $crawler_plugin_registrar_launched = false;
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $email = $this->getLoggedInUser();
     $owner = parent::getOwner($email);
     $freshest_instance = $instance_dao->getFreshestByOwnerId($owner->id);
     if ($freshest_instance) {
         $crawler_plugin_registrar_last_run = strtotime($freshest_instance->crawler_last_run);
     }
     if ($freshest_instance && $crawler_plugin_registrar_last_run < time() - $rss_crawler_refresh_rate * 60) {
         $crawler_plugin_registrar_run_url = $base_url . 'crawler/run.php?' . sprintf('un=%s&as=%s', $email, $owner->api_key);
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $crawler_plugin_registrar_run_url);
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_TIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($ch, CURLOPT_HEADER, true);
         $result = curl_exec($ch);
         curl_close($ch);
         $body = substr($result, strpos($result, "\r\n\r\n") + 4);
         if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) {
             $json = json_decode($body);
             if (isset($json->error)) {
                 $crawler_plugin_registrar_launched = false;
             } else {
                 if (isset($json->result) && $json->result == 'success') {
                     $crawler_plugin_registrar_launched = true;
                 }
             }
         } else {
             if (strpos($body, 'Error starting crawler') !== FALSE) {
                 $crawler_plugin_registrar_launched = false;
             } else {
                 $crawler_plugin_registrar_launched = true;
             }
         }
     }
     $items = array();
     $logger = Logger::getInstance();
     // Don't return an item if there is a crawler log defined;
     // it would just duplicate the information available in that file.
     if ($crawler_plugin_registrar_launched && !isset($logger->log)) {
         $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s');
         $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s'));
         $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run.";
         $items[] = self::createRSSItem($title, $link, $description);
     }
     $items = array_merge($items, $this->getAdditionalItems($base_url));
     //Add insights to RSS feed
     $insight_dao = DAOFactory::getDAO('InsightDAO');
     if ($this->isAdmin()) {
         ///show all insights for all service users
         $insights = $insight_dao->getAllInstanceInsights($page_count = InsightStreamController::PAGE_INSIGHTS_COUNT + 1, 1);
     } else {
         //show only service users owner owns
         $owner_dao = DAOFactory::getDAO('OwnerDAO');
         $owner = $owner_dao->getByEmail($this->getLoggedInUser());
         $insights = $insight_dao->getAllOwnerInstanceInsights($owner->id, $page_count = InsightStreamController::PAGE_INSIGHTS_COUNT + 1, 1);
     }
     if (sizeof($insights) == 0) {
         $title = 'No insights exist on ' . date('Y-m-d H:i:s');
         $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s'));
         $description = "ThinkUp doesn't have any insights to show you. Check your crawler log to make sure " . "ThinkUp is capturing data.";
         $items[] = self::createRSSItem($title, $link, $description);
     } else {
         foreach ($insights as $insight) {
             $username_in_title = ($insight->instance->network == 'twitter' ? '@' : '') . $insight->instance->network_username;
             $title = str_replace(':', '', $insight->headline) . " (" . $username_in_title . ")";
             $link = $base_url . '?u=' . $insight->instance->network_username . '&n=' . urlencode($insight->instance->network) . '&d=' . urlencode(date('Y-m-d', strtotime($insight->date))) . '&s=' . urlencode($insight->slug);
             $description = $insight->headline . " " . $insight->text . '<br><a href="' . $link . '">Link</a>';
             $time = strtotime($insight->date);
             $items[] = self::createRSSItem($title, $link, $description, $time);
         }
     }
     $this->addToView('items', $items);
     $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser()));
     $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate));
     return $this->generateView();
 }
 public function testGetAuthParameters()
 {
     $builders = $this->buildData();
     $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=1829cc1b13f920a05fb201e8d2a9e4dc58b669b1');
 }