/** * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed. * @return string rendered view markup */ public function authControl() { Utils::defineConstants(); $this->setContentType('application/rss+xml; charset=UTF-8'); $this->setViewTemplate('rss.tpl'); $config = Config::getInstance(); $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate'); if (empty($rss_crawler_refresh_rate)) { $rss_crawler_refresh_rate = 20; // minutes } $protocol = isset($_SERVER['HTTPS']) ? 'https' : 'http'; $base_url = "{$protocol}://" . $_SERVER['HTTP_HOST'] . THINKUP_BASE_URL; $crawler_launched = false; $instance_dao = DAOFactory::getDAO('InstanceDAO'); $freshest_instance = $instance_dao->getInstanceFreshestOne(); $crawler_last_run = strtotime($freshest_instance->crawler_last_run); if ($crawler_last_run < time() - $rss_crawler_refresh_rate * 60) { $crawler_run_url = $base_url . 'run.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser()); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $crawler_run_url); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_TIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HEADER, true); $result = curl_exec($ch); curl_close($ch); $body = substr($result, strpos($result, "\r\n\r\n") + 4); if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) { $json = json_decode($body); if (isset($json->error)) { $crawler_launched = false; } else { if (isset($json->result) && $json->result == 'success') { $crawler_launched = true; } } } else { if (strpos($body, 'Error starting crawler') !== FALSE) { $crawler_launched = false; } else { $crawler_launched = true; } } } $items = array(); if ($crawler_launched) { $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s'); $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s')); $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run."; $items[] = self::createRSSItem($title, $link, $description); } $items = array_merge($items, $this->getAdditionalItems($base_url)); $this->addToView('items', $items); $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser())); $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate)); return $this->generateView(); }
public function authControl() { Utils::defineConstants(); if ($this->isAPICall()) { // If the request comes from an API call, output JSON instead of HTML $this->setContentType('application/json; charset=UTF-8'); } else { $this->setPageTitle("ThinkUp Crawler"); $this->setViewTemplate('crawler.run-top.tpl'); $whichphp = exec('which php'); $php_path = !empty($whichphp) ? $whichphp : 'php'; $this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. '); $rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser()); $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>'); echo $this->generateView(); echo '<br /><br /><textarea rows="65" cols="110">'; $config = Config::getInstance(); $config->setValue('log_location', false); //this forces output to just echo to page $logger = Logger::getInstance(); $logger->close(); // Will make sure any exception catched below appears as plain text, and not as HTML $this->setContentType('text/plain; charset=UTF-8'); } try { $crawler = Crawler::getInstance(); $crawler->crawl(); } catch (CrawlerLockedException $e) { if ($this->isAPICall()) { // Will be caught and handled in ThinkUpController::go() throw $e; } else { // Will appear in the textarea of the HTML page echo $e->getMessage(); } } if ($this->isAPICall()) { echo json_encode((object) array('result' => 'success')); } else { echo '</textarea>'; $this->setViewTemplate('crawler.run-bottom.tpl'); echo $this->generateView(); } }
public function authControl() { Utils::defineConstants(); $this->setContentType('text/html; charset=UTF-8'); $this->setPageTitle("ThinkUp Crawler"); $this->setViewTemplate('crawler.updatenow.tpl'); $whichphp = @exec('which php'); $php_path = (!empty($whichphp))?$whichphp:'php'; $rss_url = THINKUP_BASE_URL.'rss.php?'.ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser()); $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to '. '<strong><a href="'.$rss_url.'" target="_blank">this RSS feed</a></strong> '. 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that '. 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />'. '<code style="font-family:Courier">cd '.THINKUP_WEBAPP_PATH. 'crawler/;export THINKUP_PASSWORD=yourpassword; '.$php_path.' crawl.php '.$this->getLoggedInUser(). '</code>'); if (isset($_GET['log']) && $_GET['log'] == 'full') { $this->addToView('log', 'full'); } return $this->generateView(); }
public function authControl() { $this->disableCaching(); // we don't want to cache the rss link with api key as it can get updated Utils::defineConstants(); $this->setContentType('text/html; charset=UTF-8'); $this->setPageTitle("ThinkUp Crawler"); $this->setViewTemplate('crawler.updatenow.tpl'); $whichphp = @exec('which php'); $php_path = !empty($whichphp) ? $whichphp : 'php'; $email = $this->getLoggedInUser(); $owner = parent::getOwner($email); $rss_url = THINKUP_BASE_URL . sprintf('rss.php?un=%s&as=%s', urlencode($email), $owner->api_key); $config = Config::getInstance(); $site_root_path = $config->getValue('site_root_path'); $this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this secret RSS feed</a></strong> ' . 'in your favorite newsreader. Accidentally share the feed URL? ' . '<a href="' . $site_root_path . 'account/index.php?m=manage#instances">Reset it.</a>' . '<br /><br />Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code><br /><br /><a href="http://thinkupapp.com/docs/userguide/datacapture.html">Learn more about ' . 'how to update your ThinkUp data</a>.'); if (isset($_GET['log']) && $_GET['log'] == 'full') { $this->addToView('log', 'full'); } return $this->generateView(); }
public function testGetAuthParameters() { $builders = $this->buildData(); $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=c9089f3c9adaf0186f6ffb1ee8d6501c'); }
/** * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed. * @return string rendered view markup */ public function authControl() { $this->setContentType('application/rss+xml; charset=UTF-8'); $this->setViewTemplate('rss.tpl'); $config = Config::getInstance(); $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate'); if (empty($rss_crawler_refresh_rate)) { $rss_crawler_refresh_rate = 20; // minutes } $base_url = Utils::getApplicationURL(); $crawler_plugin_registrar_launched = false; $instance_dao = DAOFactory::getDAO('InstanceDAO'); $email = $this->getLoggedInUser(); $owner = parent::getOwner($email); $freshest_instance = $instance_dao->getFreshestByOwnerId($owner->id); if ($freshest_instance) { $crawler_plugin_registrar_last_run = strtotime($freshest_instance->crawler_last_run); } if ($freshest_instance && $crawler_plugin_registrar_last_run < time() - $rss_crawler_refresh_rate * 60) { $crawler_plugin_registrar_run_url = $base_url . 'crawler/run.php?' . sprintf('un=%s&as=%s', $email, $owner->api_key); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $crawler_plugin_registrar_run_url); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_TIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HEADER, true); $result = curl_exec($ch); curl_close($ch); $body = substr($result, strpos($result, "\r\n\r\n") + 4); if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) { $json = json_decode($body); if (isset($json->error)) { $crawler_plugin_registrar_launched = false; } else { if (isset($json->result) && $json->result == 'success') { $crawler_plugin_registrar_launched = true; } } } else { if (strpos($body, 'Error starting crawler') !== FALSE) { $crawler_plugin_registrar_launched = false; } else { $crawler_plugin_registrar_launched = true; } } } $items = array(); $logger = Logger::getInstance(); // Don't return an item if there is a crawler log defined; // it would just duplicate the information available in that file. if ($crawler_plugin_registrar_launched && !isset($logger->log)) { $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s'); $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s')); $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run."; $items[] = self::createRSSItem($title, $link, $description); } $items = array_merge($items, $this->getAdditionalItems($base_url)); $this->addToView('items', $items); $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser())); $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate)); return $this->generateView(); }
/** * Gets an owner by email address * @param str Email address * @return Owner */ protected static function getOwner($email) { if (self::$owner) { return self::$owner; } else { $owner_dao = DAOFactory::getDAO('OwnerDAO'); self::$owner = $owner_dao->getByEmail($email); return self::$owner; } }
/** * Launch the crawler, if the latest crawler_last_run date is older than X minutes, then return a valid RSS feed. * @return string rendered view markup */ public function authControl() { $this->setContentType('application/rss+xml; charset=UTF-8'); $this->setViewTemplate('rss.tpl'); $config = Config::getInstance(); $rss_crawler_refresh_rate = $config->getValue('rss_crawler_refresh_rate'); if (empty($rss_crawler_refresh_rate)) { $rss_crawler_refresh_rate = 20; // minutes } $base_url = Utils::getApplicationURL(); $crawler_plugin_registrar_launched = false; $instance_dao = DAOFactory::getDAO('InstanceDAO'); $email = $this->getLoggedInUser(); $owner = parent::getOwner($email); $freshest_instance = $instance_dao->getFreshestByOwnerId($owner->id); if ($freshest_instance) { $crawler_plugin_registrar_last_run = strtotime($freshest_instance->crawler_last_run); } if ($freshest_instance && $crawler_plugin_registrar_last_run < time() - $rss_crawler_refresh_rate * 60) { $crawler_plugin_registrar_run_url = $base_url . 'crawler/run.php?' . sprintf('un=%s&as=%s', $email, $owner->api_key); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $crawler_plugin_registrar_run_url); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_TIMEOUT, 5); // seconds curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HEADER, true); $result = curl_exec($ch); curl_close($ch); $body = substr($result, strpos($result, "\r\n\r\n") + 4); if (strpos($result, 'Content-Type: application/json') && function_exists('json_decode')) { $json = json_decode($body); if (isset($json->error)) { $crawler_plugin_registrar_launched = false; } else { if (isset($json->result) && $json->result == 'success') { $crawler_plugin_registrar_launched = true; } } } else { if (strpos($body, 'Error starting crawler') !== FALSE) { $crawler_plugin_registrar_launched = false; } else { $crawler_plugin_registrar_launched = true; } } } $items = array(); $logger = Logger::getInstance(); // Don't return an item if there is a crawler log defined; // it would just duplicate the information available in that file. if ($crawler_plugin_registrar_launched && !isset($logger->log)) { $title = 'ThinkUp crawl started on ' . date('Y-m-d H:i:s'); $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s')); $description = "Last ThinkUp crawl ended on {$freshest_instance->crawler_last_run}<br />A new crawl " . "was started just now, since it's been more than {$rss_crawler_refresh_rate} minutes since the last run."; $items[] = self::createRSSItem($title, $link, $description); } $items = array_merge($items, $this->getAdditionalItems($base_url)); //Add insights to RSS feed $insight_dao = DAOFactory::getDAO('InsightDAO'); if ($this->isAdmin()) { ///show all insights for all service users $insights = $insight_dao->getAllInstanceInsights($page_count = InsightStreamController::PAGE_INSIGHTS_COUNT + 1, 1); } else { //show only service users owner owns $owner_dao = DAOFactory::getDAO('OwnerDAO'); $owner = $owner_dao->getByEmail($this->getLoggedInUser()); $insights = $insight_dao->getAllOwnerInstanceInsights($owner->id, $page_count = InsightStreamController::PAGE_INSIGHTS_COUNT + 1, 1); } if (sizeof($insights) == 0) { $title = 'No insights exist on ' . date('Y-m-d H:i:s'); $link = $base_url . 'rss.php?d=' . urlencode(date('Y-m-d H:i:s')); $description = "ThinkUp doesn't have any insights to show you. Check your crawler log to make sure " . "ThinkUp is capturing data."; $items[] = self::createRSSItem($title, $link, $description); } else { foreach ($insights as $insight) { $username_in_title = ($insight->instance->network == 'twitter' ? '@' : '') . $insight->instance->network_username; $title = str_replace(':', '', $insight->headline) . " (" . $username_in_title . ")"; $link = $base_url . '?u=' . $insight->instance->network_username . '&n=' . urlencode($insight->instance->network) . '&d=' . urlencode(date('Y-m-d', strtotime($insight->date))) . '&s=' . urlencode($insight->slug); $description = $insight->headline . " " . $insight->text . '<br><a href="' . $link . '">Link</a>'; $time = strtotime($insight->date); $items[] = self::createRSSItem($title, $link, $description, $time); } } $this->addToView('items', $items); $this->addToView('logged_in_user', htmlspecialchars($this->getLoggedInUser())); $this->addToView('rss_crawler_refresh_rate', htmlspecialchars($rss_crawler_refresh_rate)); return $this->generateView(); }
public function testGetAuthParameters() { $builders = $this->buildData(); $this->assertEqual(ThinkUpAuthAPIController::getAuthParameters('*****@*****.**'), 'un=me%40example.com&as=1829cc1b13f920a05fb201e8d2a9e4dc58b669b1'); }