Example #1
0
 /**
  * Build an usage event.
  * @param $hookName string
  * @param $args array
  * @return array
  */
 protected function buildUsageEvent($hookName, $args)
 {
     // Finished downloading a file?
     if ($hookName == 'FileManager::downloadFileFinished') {
         // The usage event for this request is already build and
         // passed to any other registered hook.
         return null;
     }
     $application = Application::getApplication();
     $request = $application->getRequest();
     $router = $request->getRouter();
     /* @var $router PageRouter */
     $templateMgr = $args[0];
     /* @var $templateMgr TemplateManager */
     // We are just interested in page requests.
     if (!is_a($router, 'PageRouter')) {
         return false;
     }
     // Check whether we are in journal context.
     $context = $router->getContext($request);
     if (!$context) {
         return false;
     }
     // Prepare request information.
     list($pubObject, $downloadSuccess, $assocType, $idParams, $canonicalUrlPage, $canonicalUrlOp, $canonicalUrlParams) = $this->getUsageEventData($hookName, $args, $request, $router, $templateMgr, $context);
     if (!$pubObject) {
         return false;
     }
     // Timestamp.
     $time = Core::getCurrentDate();
     // Actual document size, MIME type.
     $htmlPageAssocTypes = $this->getHtmlPageAssocTypes();
     if (in_array($assocType, $htmlPageAssocTypes)) {
         // HTML pages with no file downloads.
         $docSize = 0;
         $mimeType = 'text/html';
     } else {
         // Files.
         $docSize = (int) $pubObject->getFileSize();
         $mimeType = $pubObject->getFileType();
     }
     $canonicalUrl = $router->url($request, null, $canonicalUrlPage, $canonicalUrlOp, $canonicalUrlParams);
     // Make sure we log the server name and not aliases.
     $configBaseUrl = Config::getVar('general', 'base_url');
     $requestBaseUrl = $request->getBaseUrl();
     if ($requestBaseUrl !== $configBaseUrl) {
         // Make sure it's not an url override (no alias on that case).
         if (!in_array($requestBaseUrl, Config::getContextBaseUrls()) && $requestBaseUrl !== Config::getVar('general', 'base_url[index]')) {
             // Alias found, replace it by base_url from config file.
             // Make sure we use the correct base url override value for the context, if any.
             $baseUrlReplacement = Config::getVar('general', 'base_url[' . $context->getPath() . ']');
             if (!$baseUrlReplacement) {
                 $baseUrlReplacement = $configBaseUrl;
             }
             $canonicalUrl = str_replace($requestBaseUrl, $baseUrlReplacement, $canonicalUrl);
         }
     }
     // Public identifiers.
     // 1) A unique system internal ID that will help us to easily attribute
     //    statistics to a specific publication object.
     array_unshift($idParams, 'c' . $context->getId());
     $siteId = $this->getUniqueSiteId();
     if (empty($siteId)) {
         // Create a globally unique, persistent site ID
         // so that we can uniquely identify publication
         // objects from this site, even if the URL or any
         // other externally influenced information changes.
         $siteId = uniqid();
         $this->updateSetting(0, 'uniqueSiteId', $siteId);
     }
     array_unshift($idParams, $siteId);
     $applicationName = $application->getName();
     $applicationId = $applicationName . ':' . implode('-', $idParams);
     $idKey = 'other::' . $applicationName;
     $identifiers = array($idKey => $applicationId);
     // 2) Standardized public identifiers, e.g. DOI, URN, etc.
     if ($this->isPubIdObjectType($pubObject)) {
         $pubIdPlugins = PluginRegistry::loadCategory('pubIds', true, $context->getId());
         if (is_array($pubIdPlugins)) {
             foreach ($pubIdPlugins as $pubIdPlugin) {
                 if (!$pubIdPlugin->getEnabled()) {
                     continue;
                 }
                 $pubId = $pubObject->getStoredPubId($pubIdPlugin->getPubIdType());
                 if ($pubId) {
                     $identifiers[$pubIdPlugin->getPubIdType()] = $pubId;
                 }
             }
         }
     }
     // Service URI.
     $serviceUri = $router->url($request, $context->getPath());
     // IP and Host.
     $ip = $request->getRemoteAddr();
     $host = null;
     if (isset($_SERVER['REMOTE_HOST'])) {
         // We do NOT actively look up the remote host to
         // avoid the performance penalty. We only set the remote
         // host if we get it "for free".
         $host = $_SERVER['REMOTE_HOST'];
     }
     // HTTP user agent.
     $userAgent = $request->getUserAgent();
     // HTTP referrer.
     $referrer = isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : null;
     // User and roles.
     $user = $request->getUser();
     $roles = array();
     if ($user) {
         $roleDao = DAORegistry::getDAO('RoleDAO');
         /* @var $roleDao PKPRoleDAO */
         $rolesByContext = $roleDao->getByUserIdGroupedByContext($user->getId());
         foreach (array(CONTEXT_SITE, $context->getId()) as $workingContext) {
             if (isset($rolesByContext[$workingContext])) {
                 foreach ($rolesByContext[$workingContext] as $roleId => $role) {
                     $roles[] = $roleId;
                 }
             }
         }
     }
     // Try a simple classification of the request.
     $classification = null;
     if (!empty($roles)) {
         // Access by editors, authors, etc.
         $internalRoles = array_diff($roles, array(ROLE_ID_READER));
         if (!empty($internalRoles)) {
             $classification = USAGE_EVENT_PLUGIN_CLASSIFICATION_ADMIN;
         }
     }
     if ($request->isBot()) {
         // The bot classification overwrites other classifications.
         $classification = USAGE_EVENT_PLUGIN_CLASSIFICATION_BOT;
     }
     // TODO: Classify LOCKSS or similar as 'internal' access.
     /*
      * Comparison of our event log format with Apache log parameters...
      *
      * 1) default parameters:
      * %h: remote hostname or IP => $ip, $host
      * %l: remote logname (identd) => not supported, see $user, $roles instead
      * %u: remote user => not supported, see $user, $roles instead
      * %t: request time => $time
      * %r: query => derived objects: $pubObject, $assocType, $canonicalUrl, $identifiers, $serviceUri, $classification
      * %s: status => not supported (always 200 in our case)
      * %b: response size => $docSize
      *
      * 2) other common parameters
      * %O: bytes sent => not supported (cannot be reliably determined from within PHP)
      * %X: connection status => $downloadSuccess (not reliable!)
      * %{ContentType}o: => $mimeType
      * %{User-agent}i: => $userAgent
      * %{Referer}i: => $referrer
      *
      * Several items, e.g. time etc., may differ from what Apache
      * would actually log. But the differences do not matter for our use
      * cases.
      */
     // Collect all information into an array.
     $usageEvent = compact('time', 'pubObject', 'assocType', 'canonicalUrl', 'mimeType', 'identifiers', 'docSize', 'downloadSuccess', 'serviceUri', 'ip', 'host', 'user', 'roles', 'userAgent', 'referrer', 'classification');
     return $usageEvent;
 }
Example #2
0
 /**
  * Check if the passed base url is part of
  * the passed url, based on the context base url
  * configuration. Both parameters can represent
  * full url (host plus path) or just the path,
  * but they have to be consistent.
  * @param $baseUrl string Full base url
  * or just it's path info.
  * @param $url string Full url or just it's
  * path info.
  * @return boolean
  */
 function _checkBaseUrl($baseUrl, $url)
 {
     // Check if both base url and url have host
     // component or not.
     $baseUrlHasHost = (bool) parse_url($baseUrl, PHP_URL_HOST);
     $urlHasHost = (bool) parse_url($url, PHP_URL_HOST);
     if ($baseUrlHasHost !== $urlHasHost) {
         return false;
     }
     $contextBaseUrls =& Config::getContextBaseUrls();
     // If the base url is found inside the passed url,
     // then we might found the right context path.
     if (strpos($url, $baseUrl) === 0) {
         if (strpos($url, '/index.php') == strlen($baseUrl) - 1) {
             // index.php appears right after the base url,
             // no more possible paths.
             return true;
         } else {
             // Still have to check if there is no other context
             // base url that combined with it's context path is
             // equal to this base url. If it exists, we can't
             // tell which base url is contained in url.
             foreach ($contextBaseUrls as $contextPath => $workingBaseUrl) {
                 $urlToCheck = $workingBaseUrl . '/' . $contextPath;
                 if (!$baseUrlHasHost) {
                     $urlToCheck = parse_url($urlToCheck, PHP_URL_PATH);
                 }
                 if ($baseUrl == $urlToCheck) {
                     return null;
                 }
             }
             return true;
         }
     }
     return false;
 }
 /**
  * Build an usage event.
  * @param $hookName string
  * @param $args array
  * @return array
  */
 function _buildUsageEvent($hookName, $args)
 {
     // Finished downloading a file?
     if ($hookName == 'FileManager::downloadFileFinished') {
         // The usage event for this request is already build and
         // passed to any other registered hook.
         return null;
     }
     $application =& Application::getApplication();
     $request =& $application->getRequest();
     $router =& $request->getRouter();
     /* @var $router PageRouter */
     $templateMgr =& $args[0];
     /* @var $templateMgr TemplateManager */
     // We are just interested in page requests.
     if (!is_a($router, 'PageRouter')) {
         return false;
     }
     // Check whether we are in journal context.
     $journal =& $router->getContext($request);
     if (!$journal) {
         return false;
     }
     // Prepare request information.
     $downloadSuccess = false;
     $idParams = array();
     $canonicalUrlParams = array();
     switch ($hookName) {
         // Article abstract and HTML galley.
         case 'TemplateManager::display':
             $page = $router->getRequestedPage($request);
             $op = $router->getRequestedOp($request);
             // First check for a journal index page view.
             if (($page == 'index' || empty($page)) && $op == 'index') {
                 $pubObject =& $templateMgr->get_template_vars('currentJournal');
                 if (is_a($pubObject, 'Journal')) {
                     $assocType = ASSOC_TYPE_JOURNAL;
                     $canonicalUrlOp = '';
                     $downloadSuccess = true;
                     break;
                 } else {
                     return false;
                 }
             }
             // We are interested in access to the article abstract/galley, issue view page.
             $wantedPages = array('article', 'issue');
             $wantedOps = array('view', 'articleView');
             if (!in_array($page, $wantedPages) || !in_array($op, $wantedOps)) {
                 return false;
             }
             $issue =& $templateMgr->get_template_vars('issue');
             $galley =& $templateMgr->get_template_vars('galley');
             /* @var $galley ArticleGalley */
             $article =& $templateMgr->get_template_vars('article');
             // If there is no published object, there is no usage event.
             if (!$issue && !$galley && !$article) {
                 return false;
             }
             if ($galley) {
                 if ($galley->isHTMLGalley()) {
                     $pubObject =& $galley;
                     $assocType = ASSOC_TYPE_GALLEY;
                     $canonicalUrlParams = array($article->getBestArticleId(), $pubObject->getBestGalleyId($journal));
                     $idParams = array('a' . $article->getId(), 'g' . $pubObject->getId());
                 } else {
                     // This is an access to an intermediary galley page which we
                     // do not count.
                     return false;
                 }
             } else {
                 if ($article) {
                     $pubObject =& $article;
                     $assocType = ASSOC_TYPE_ARTICLE;
                     $canonicalUrlParams = array($pubObject->getBestArticleId($journal));
                     $idParams = array('a' . $pubObject->getId());
                 } else {
                     $pubObject =& $issue;
                     $assocType = ASSOC_TYPE_ISSUE;
                     $canonicalUrlParams = array($pubObject->getBestIssueId($journal));
                     $idParams = array('i' . $pubObject->getId());
                 }
             }
             // The article, issue and HTML/remote galley pages do not download anything.
             $downloadSuccess = true;
             $canonicalUrlOp = 'view';
             break;
         case 'ArticleHandler::viewRemoteGalley':
             $article =& $args[0];
             $pubObject =& $args[1];
             $assocType = ASSOC_TYPE_GALLEY;
             $canonicalUrlParams = array($article->getBestArticleId(), $pubObject->getBestGalleyId($journal));
             $idParams = array('a' . $article->getId(), 'g' . $pubObject->getId());
             $downloadSuccess = true;
             $canonicalUrlOp = 'view';
             break;
             // Article galley (except for HTML and remote galley).
         // Article galley (except for HTML and remote galley).
         case 'ArticleHandler::viewFile':
         case 'ArticleHandler::downloadFile':
             $pubObject =& $args[1];
             $assocType = ASSOC_TYPE_GALLEY;
             $canonicalUrlOp = 'download';
             $article =& $args[0];
             $canonicalUrlParams = array($article->getBestArticleId(), $pubObject->getBestGalleyId($journal));
             $idParams = array('a' . $article->getId(), 'g' . $pubObject->getId());
             break;
             // Supplementary file.
         // Supplementary file.
         case 'ArticleHandler::downloadSuppFile':
             $pubObject =& $args[1];
             $assocType = ASSOC_TYPE_SUPP_FILE;
             $canonicalUrlOp = 'downloadSuppFile';
             $article =& $args[0];
             $canonicalUrlParams = array($article->getBestArticleId(), $pubObject->getBestSuppFileId($journal));
             $idParams = array('a' . $article->getId(), 's' . $pubObject->getId());
             break;
             // Issue galley.
         // Issue galley.
         case 'IssueHandler::viewFile':
             $pubObject =& $args[1];
             $assocType = ASSOC_TYPE_ISSUE_GALLEY;
             $canonicalUrlOp = 'download';
             $issue =& $args[0];
             $canonicalUrlParams = array($issue->getBestIssueId(), $pubObject->getBestGalleyId($journal));
             $idParams = array('i' . $issue->getId(), 'ig' . $pubObject->getId());
             break;
         default:
             // Why are we called from an unknown hook?
             assert(false);
     }
     // Timestamp.
     $time = Core::getCurrentDate();
     // Actual document size, MIME type.
     $htmlPageAssocTypes = array(ASSOC_TYPE_ARTICLE, ASSOC_TYPE_ISSUE, ASSOC_TYPE_JOURNAL);
     if (in_array($assocType, $htmlPageAssocTypes)) {
         // Article abstract or issue view page.
         $docSize = 0;
         $mimeType = 'text/html';
     } else {
         // Files.
         $docSize = (int) $pubObject->getFileSize();
         $mimeType = $pubObject->getFileType();
     }
     // Canonical URL.
     switch ($assocType) {
         case ASSOC_TYPE_ISSUE:
         case ASSOC_TYPE_ISSUE_GALLEY:
             $canonicalUrlPage = 'issue';
             break;
         case ASSOC_TYPE_ARTICLE:
         case ASSOC_TYPE_GALLEY:
         case ASSOC_TYPE_SUPP_FILE:
             $canonicalUrlPage = 'article';
             break;
         case ASSOC_TYPE_JOURNAL:
             $canonicalUrlPage = 'index';
             break;
     }
     $canonicalUrl = $router->url($request, null, $canonicalUrlPage, $canonicalUrlOp, $canonicalUrlParams);
     // Make sure we log the server name and not aliases.
     $configBaseUrl = Config::getVar('general', 'base_url');
     $requestBaseUrl = $request->getBaseUrl();
     if ($requestBaseUrl !== $configBaseUrl) {
         // Make sure it's not an url override (no alias on that case).
         if (!in_array($requestBaseUrl, Config::getContextBaseUrls()) && $requestBaseUrl !== Config::getVar('general', 'base_url[index]')) {
             // Alias found, replace it by base_url from config file.
             // Make sure we use the correct base url override value for the context, if any.
             $baseUrlReplacement = Config::getVar('general', 'base_url[' . $journal->getPath() . ']');
             if (!$baseUrlReplacement) {
                 $baseUrlReplacement = $configBaseUrl;
             }
             $canonicalUrl = str_replace($requestBaseUrl, $baseUrlReplacement, $canonicalUrl);
         }
     }
     // Public identifiers.
     // 1) A unique OJS-internal ID that will help us to easily attribute
     //    statistics to a specific publication object.
     array_unshift($idParams, 'j' . $journal->getId());
     $siteId = $this->getUniqueSiteId();
     if (empty($siteId)) {
         // Create a globally unique, persistent site ID
         // so that we can uniquely identify publication
         // objects from this site, even if the URL or any
         // other externally influenced information changes.
         $siteId = uniqid();
         $this->updateSetting(0, 'uniqueSiteId', $siteId);
     }
     array_unshift($idParams, $siteId);
     $ojsId = 'ojs:' . implode('-', $idParams);
     $identifiers = array('other::ojs' => $ojsId);
     // 2) Standardized public identifiers, e.g. DOI, URN, etc.
     if (!is_a($pubObject, 'IssueGalley') && !is_a($pubObject, 'Journal')) {
         $pubIdPlugins =& PluginRegistry::loadCategory('pubIds', true, $journal->getId());
         if (is_array($pubIdPlugins)) {
             foreach ($pubIdPlugins as $pubIdPlugin) {
                 if (!$pubIdPlugin->getEnabled()) {
                     continue;
                 }
                 $pubId = $pubIdPlugin->getPubId($pubObject);
                 if ($pubId) {
                     $identifiers[$pubIdPlugin->getPubIdType()] = $pubId;
                 }
             }
         }
     }
     // Service URI.
     $serviceUri = $router->url($request, $journal->getPath());
     // IP and Host.
     $ip = $request->getRemoteAddr();
     $host = null;
     if (isset($_SERVER['REMOTE_HOST'])) {
         // We do NOT actively look up the remote host to
         // avoid the performance penalty. We only set the remote
         // host if we get it "for free".
         $host = $_SERVER['REMOTE_HOST'];
     }
     // HTTP user agent.
     $userAgent = $request->getUserAgent();
     // HTTP referrer.
     $referrer = isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : null;
     // User and roles.
     $user =& $request->getUser();
     $roles = array();
     if ($user) {
         $roleDao =& DAORegistry::getDAO('RoleDAO');
         $rolesByContext =& $roleDao->getByUserIdGroupedByContext($user->getId());
         foreach (array(CONTEXT_SITE, $journal->getId()) as $context) {
             if (isset($rolesByContext[$context])) {
                 foreach ($rolesByContext[$context] as $role) {
                     $roles[] = $role->getRoleId();
                 }
             }
         }
     }
     // Try a simple classification of the request.
     $classification = null;
     if (!empty($roles)) {
         // Access by editors, authors, etc.
         $internalRoles = array_diff($roles, array(ROLE_ID_READER));
         if (!empty($internalRoles)) {
             $classification = USAGE_EVENT_PLUGIN_CLASSIFICATION_ADMIN;
         }
     }
     if ($request->isBot()) {
         // The bot classification overwrites other classifications.
         $classification = USAGE_EVENT_PLUGIN_CLASSIFICATION_BOT;
     }
     // TODO: Classify LOCKSS or similar as 'internal' access.
     /*
      * Comparison of our event log format with Apache log parameters...
      *
      * 1) default parameters:
      * %h: remote hostname or IP => $ip, $host
      * %l: remote logname (identd) => not supported, see $user, $roles instead
      * %u: remote user => not supported, see $user, $roles instead
      * %t: request time => $time
      * %r: query => derived objects: $pubObject, $assocType, $canonicalUrl, $identifiers, $serviceUri, $classification
      * %s: status => not supported (always 200 in our case)
      * %b: response size => $docSize
      *
      * 2) other common parameters
      * %O: bytes sent => not supported (cannot be reliably determined from within PHP)
      * %X: connection status => $downloadSuccess (not reliable!)
      * %{ContentType}o: => $mimeType
      * %{User-agent}i: => $userAgent
      * %{Referer}i: => $referrer
      *
      * Several items, e.g. time etc., may differ from what Apache
      * would actually log. But the differences do not matter for our use
      * cases.
      */
     // Collect all information into an array.
     $usageEvent = compact('time', 'pubObject', 'assocType', 'canonicalUrl', 'mimeType', 'identifiers', 'docSize', 'downloadSuccess', 'serviceUri', 'ip', 'host', 'user', 'roles', 'userAgent', 'referrer', 'classification');
     return $usageEvent;
 }