Example #1
0
 /**
  * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed.
  * @return string rendered view markup
  */
 public function authControl()
 {
     Utils::defineConstants();
     $this->setContentType('application/rss+xml; charset=UTF-8');
     $this->setViewTemplate('rss.tpl');
     $config = Config::getInstance();
     $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate');
     if (empty($rss_crawler_refresh_rate)) {
         $rss_crawler_refresh_rate = 20;
         // minutes
     }
     $protocol = isset($_SERVER['HTTPS']) ? 'https' : 'http';
     $base_url = "{$protocol}://" . $_SERVER['HTTP_HOST'] . THINKUP_BASE_URL;
     $crawler_launched = false;
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $freshest_instance = $instance_dao->getInstanceFreshestOne();
     $crawler_last_run = strtotime($freshest_instance->crawler_last_run);
     if ($crawler_last_run < time() - $rss_crawler_refresh_rate * 60) {
         $crawler_run_url = $base_url . 'run.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $crawler_run_url);
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_TIMEOUT, 5);
         // seconds
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($ch, CURLOPT_HEADER, true);
         $result = curl_exec($ch);
         curl_close($ch);
         $body = substr($result, strpos($result, "\r\n\r\n") + 4);
         if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) {
             $json = json_decode($body);
             if (isset($json->error)) {
                 $crawler_launched = false;
             } else {
                 if (isset($json->result) && $json->result == 'success') {
                     $crawler_launched = true;
                 }
             }
         } else {
             if (strpos($body, 'Error starting crawler') !== FALSE) {
                 $crawler_launched = false;
             } else {
                 $crawler_launched = true;
             }
         }
     }
     $items = array();
     if ($crawler_launched) {
         $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s');
         $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s'));
         $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run.";
         $items[] = self::createRSSItem($title, $link, $description);
     }
     $items = array_merge($items, $this->getAdditionalItems($base_url));
     $this->addToView('items', $items);
     $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser()));
     $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate));
     return $this->generateView();
 }
 public function authControl()
 {
     Utils::defineConstants();
     if ($this->isAPICall()) {
         // If the request comes from an API call, output JSON instead of HTML
         $this->setContentType('application/json; charset=UTF-8');
     } else {
         $this->setPageTitle("ThinkUp Crawler");
         $this->setViewTemplate('crawler.run-top.tpl');
         $whichphp = exec('which php');
         $php_path = !empty($whichphp) ? $whichphp : 'php';
         $this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. ');
         $rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
         $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>');
         echo $this->generateView();
         echo '<br /><br /><textarea rows="65" cols="110">';
         $config = Config::getInstance();
         $config->setValue('log_location', false);
         //this forces output to just echo to page
         $logger = Logger::getInstance();
         $logger->close();
         // Will make sure any exception catched below appears as plain text, and not as HTML
         $this->setContentType('text/plain; charset=UTF-8');
     }
     try {
         $crawler = Crawler::getInstance();
         $crawler->crawl();
     } catch (CrawlerLockedException $e) {
         if ($this->isAPICall()) {
             // Will be caught and handled in ThinkUpController::go()
             throw $e;
         } else {
             // Will appear in the textarea of the HTML page
             echo $e->getMessage();
         }
     }
     if ($this->isAPICall()) {
         echo json_encode((object) array('result' => 'success'));
     } else {
         echo '</textarea>';
         $this->setViewTemplate('crawler.run-bottom.tpl');
         echo $this->generateView();
     }
 }
 public function authControl() {
     Utils::defineConstants();
     $this->setContentType('text/html; charset=UTF-8');
     $this->setPageTitle("ThinkUp Crawler");
     $this->setViewTemplate('crawler.updatenow.tpl');
     $whichphp = @exec('which php');
     $php_path =  (!empty($whichphp))?$whichphp:'php';
     $rss_url = THINKUP_BASE_URL.'rss.php?'.ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
     $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to '.
         '<strong><a href="'.$rss_url.'" target="_blank">this RSS feed</a></strong> '.
         'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that '.
         'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />'.
         '<code style="font-family:Courier">cd '.THINKUP_WEBAPP_PATH.
         'crawler/;export THINKUP_PASSWORD=yourpassword; '.$php_path.' crawl.php '.$this->getLoggedInUser().
         '</code>');
     if (isset($_GET['log']) && $_GET['log'] == 'full') {
         $this->addToView('log', 'full');
     }
     return $this->generateView();
 }
 public function testGetAuthParameters()
 {
     $builders = $this->buildData();
     $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=c9089f3c9adaf0186f6ffb1ee8d6501c');
 }
 public function testGetAuthParameters()
 {
     $builders = $this->buildData();
     $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=1829cc1b13f920a05fb201e8d2a9e4dc58b669b1');
 }