Beispiel #1
0
 public function run()
 {
     require_once 'inc/SweteSite.class.php';
     require_once 'inc/SweteWebpage.class.php';
     require_once 'inc/PageProcessor.php';
     require_once 'modules/tm/lib/XFTranslationMemory.php';
     $wpids = array();
     $numPages = 0;
     foreach ($this->changedTranslationMemories as $tmid => $strids) {
         if (!$strids) {
             $strids = array(0);
         }
         $strids = implode(',', $strids);
         $sql = "select distinct wp.webpage_id, s.source_language\n\t\t\t\tfrom \n\t\t\t\t\twebpage_properties wp \n\t\t\t\t\tinner join webpage_strings ws on wp.webpage_id=ws.webpage_id\n\t\t\t\t\tinner join webpages w on wp.webpage_id=w.webpage_id\n\t\t\t\t\tinner join websites s on w.website_id=s.website_id\n\t\t\t\twhere\n\t\t\t\t\twp.effective_translation_memory_id='" . addslashes($tmid) . "' and\n\t\t\t\t\tws.string_id in (" . $strids . ")";
         $res = df_q($sql);
         $numPages += mysql_num_rows($res);
         $wplangs = array();
         while ($row = mysql_fetch_row($res)) {
             list($webpageId, $sourceLanguage) = $row;
             $wpids[] = $webpageId;
             $wplangs[] = $sourceLanguage;
         }
         @mysql_free_result($res);
     }
     df_q("update background_processes set \n\t\t\t\tstatus_message='" . addslashes('Updating webpages with new translations') . "',\n\t\t\t\tstatus_current_position=0,\n\t\t\t\tstatus_total='" . addslashes($numPages) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
     $count = 1;
     foreach ($wpids as $k => $webpageId) {
         $pageWrapper = SweteWebpage::loadById($webpageId, $wplangs[$k]);
         $pageWrapper->setSite($this->getSite($pageWrapper->getRecord()->val('website_id')));
         $tmid = $pageWrapper->getTranslationMemoryId(true);
         if ($tmid) {
             $tm = $this->getTranslationMemory($tmid);
             if ($tm) {
                 $processor = new PageProcessor();
                 $processor->site = $pageWrapper->getSite();
                 $processor->translationMemory = $tm;
                 $processor->page = $pageWrapper;
                 $processor->translateMinStatus = 3;
                 $processor->translateMaxStatus = 5;
                 $processor->logTranslationMisses = true;
                 $processor->savePage = true;
                 $processor->saveTranslationLogRecord = true;
                 $processor->process();
             }
         }
         df_q("update background_processes set \n\t\t\t\tstatus_current_position='" . addslashes($count) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
         $count++;
     }
     df_q("update background_processes set \n\t\t\t\tstatus_message='" . addslashes('New translations successfully applied to ' . $numPages . ' pages.') . "',\n\t\t\t\tstatus_current_position=0,\n\t\t\t\tstatus_total='" . addslashes($numPages) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
 }
Beispiel #2
0
 /**
  * Set-ups the any indexing plugins associated with this page
  * processor
  *
  * @param array $plugins an array of indexing plugins which might
  *     do further processing on the data handles by this page
  *     processor
  * @param int $max_description_len maximal length of a page summary
  * @param int $summarizer_option CRAWL_CONSTANT specifying what kind
  *      of summarizer to use self::BASIC_SUMMARIZER or
  *      self::CENTROID_SUMMARIZER
  */
 function __construct($plugins = array(), $max_description_len = NULL, $summarizer_option = self::BASIC_SUMMARIZER)
 {
     $this->plugin_instances = $plugins;
     $this->summarizer_option = $summarizer_option;
     if ($max_description_len != NULL) {
         self::$max_description_len = $max_description_len;
     } else {
         self::$max_description_len = MAX_DESCRIPTION_LEN;
     }
 }
Beispiel #3
0
 /**
  * @brief Imports a node and all of its children into the database.
  *
  * @param stdClass $node The root node to import.
  * @param Dataface_Record $parentPage A record of the @e webpages table
  * that represents the parent page of the current page.
  *
  * @see SiteCrawler for more information about nodes and the properties
  * they can have.
  */
 public function importNode(stdClass $node, $parentPage = null)
 {
     $page = null;
     if (isset($node->path) and isset($node->httpStatus)) {
         $page = df_get_record('webpages', array('webpage_url' => '=' . $node->path, 'website_id' => '=' . $this->site->getRecord()->val('website_id')));
         if (!$page) {
             $page = new Dataface_Record('webpages', array());
             $page->setValues(array('website_id' => $this->site->getRecord()->val('website_id'), 'webpage_url' => $node->path, 'active' => -1, 'posted_by' => $this->username, 'parent_id' => $parentPage ? $parentPage->val('webpage_id') : null, 'is_loaded' => 0));
             $res = $page->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage());
             }
             $this->pagesAdded[] = $page;
         } else {
             $this->pagesUpdated[] = $page;
         }
         $page->setValues(array('last_checked' => date('Y-m-d H:i:s'), 'last_checked_response_code' => $node->httpStatus, 'last_checked_content_type' => $node->contentType, 'last_checked_by' => $this->username));
         $updateRefreshLog = false;
         $translationStats = array();
         //if ( $node->content and $this->loadContent and (!$page->val('locked') or $this->overrideLocks) ){
         if (@$node->content and @$this->loadContent) {
             if ($page->val('locked') and !$this->overrideLocks) {
                 error_log("Skipping refresh of " . @$node->path . " because the page is locked.");
             } else {
                 $page->setValues(array('last_refresh' => date('Y-m-d H:i:s'), 'last_refresh_response_code' => $node->httpStatus, 'is_loaded' => 1, 'webpage_content' => $node->content));
                 // Now log the check
                 $logEntry = new Dataface_Record('webpage_refresh_log', array());
                 $logEntry->setValues(array('webpage_id' => $page->val('webpage_id'), 'date_checked' => date('Y-m-d H:i:s'), 'response_code' => $node->httpStatus, 'content_type' => $node->contentType, 'content' => $node->content, 'checked_by' => $this->username));
                 $res = $logEntry->save();
                 if (PEAR::isError($res)) {
                     throw new Exception($res->getMessage());
                 }
                 if ($this->translate) {
                     $pageWrapper = new SweteWebpage($page);
                     $tmid = $pageWrapper->getTranslationMemoryId(true);
                     if ($tmid) {
                         $tm = $this->getTranslationMemory($tmid);
                         if ($tm) {
                             import('inc/PageProcessor.php');
                             $processor = new PageProcessor();
                             $processor->webpageRefreshLogId = $logEntry->val('refresh_log_id');
                             $processor->site = $this->site;
                             $processor->translationMemory = $tm;
                             $processor->page = $pageWrapper;
                             $processor->translateMinStatus = $this->translateMinStatus;
                             $processor->translateMaxStatus = $this->translateMaxStatus;
                             $processor->logTranslationMisses = $this->logTranslationMisses;
                             $processor->savePage = false;
                             $processor->saveTranslationLogRecord = true;
                             $processor->process();
                         }
                     }
                 }
             }
         }
         $res = $page->save();
         // Now log the check
         $logEntry = new Dataface_Record('webpage_check_log', array());
         $logEntry->setValues(array('webpage_id' => $page->val('webpage_id'), 'date_checked' => date('Y-m-d H:i:s'), 'response_code' => $node->httpStatus, 'content_type' => $node->contentType, 'checked_by' => $this->username));
         $res = $logEntry->save();
         if (PEAR::isError($res)) {
             throw new Exception($res->getMessage());
         }
     }
     if (isset($node->children) and is_array($node->children)) {
         foreach ($node->children as $child) {
             $this->importNode($child, $page);
         }
     }
 }