Esempio n. 1
 public function run()
     $page = SweteWebpage::loadById($this->rootPageId, $this->lang);
     if (!$page) {
         throw new Exception("Failed to load page :" . $this->rootPageId . " language " . $this->lang);
     $page->getInheritableProperty($this->property, true, $this->inheritVal);
     $site = $page->getSite();
     SweteSite::calculateEffectivePropertyToTree($this->property, $page, $page->getInheritableProperty($this->property, false, $this->inheritVal));
Esempio n. 2
 public function run()
     require_once 'inc/SweteSite.class.php';
     require_once 'inc/SweteWebpage.class.php';
     require_once 'inc/PageProcessor.php';
     require_once 'modules/tm/lib/XFTranslationMemory.php';
     $wpids = array();
     $numPages = 0;
     foreach ($this->changedTranslationMemories as $tmid => $strids) {
         if (!$strids) {
             $strids = array(0);
         $strids = implode(',', $strids);
         $sql = "select distinct wp.webpage_id, s.source_language\n\t\t\t\tfrom \n\t\t\t\t\twebpage_properties wp \n\t\t\t\t\tinner join webpage_strings ws on wp.webpage_id=ws.webpage_id\n\t\t\t\t\tinner join webpages w on wp.webpage_id=w.webpage_id\n\t\t\t\t\tinner join websites s on w.website_id=s.website_id\n\t\t\t\twhere\n\t\t\t\t\twp.effective_translation_memory_id='" . addslashes($tmid) . "' and\n\t\t\t\t\tws.string_id in (" . $strids . ")";
         $res = df_q($sql);
         $numPages += mysql_num_rows($res);
         $wplangs = array();
         while ($row = mysql_fetch_row($res)) {
             list($webpageId, $sourceLanguage) = $row;
             $wpids[] = $webpageId;
             $wplangs[] = $sourceLanguage;
     df_q("update background_processes set \n\t\t\t\tstatus_message='" . addslashes('Updating webpages with new translations') . "',\n\t\t\t\tstatus_current_position=0,\n\t\t\t\tstatus_total='" . addslashes($numPages) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
     $count = 1;
     foreach ($wpids as $k => $webpageId) {
         $pageWrapper = SweteWebpage::loadById($webpageId, $wplangs[$k]);
         $tmid = $pageWrapper->getTranslationMemoryId(true);
         if ($tmid) {
             $tm = $this->getTranslationMemory($tmid);
             if ($tm) {
                 $processor = new PageProcessor();
                 $processor->site = $pageWrapper->getSite();
                 $processor->translationMemory = $tm;
                 $processor->page = $pageWrapper;
                 $processor->translateMinStatus = 3;
                 $processor->translateMaxStatus = 5;
                 $processor->logTranslationMisses = true;
                 $processor->savePage = true;
                 $processor->saveTranslationLogRecord = true;
         df_q("update background_processes set \n\t\t\t\tstatus_current_position='" . addslashes($count) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
     df_q("update background_processes set \n\t\t\t\tstatus_message='" . addslashes('New translations successfully applied to ' . $numPages . ' pages.') . "',\n\t\t\t\tstatus_current_position=0,\n\t\t\t\tstatus_total='" . addslashes($numPages) . "'\n\t\t\t\twhere process_id='" . addslashes($this->getProcessId()) . "'");
Esempio n. 3
  *	Tests approve() on a job for a static site, where a webpage was added to the job, and one translation for the webpage
 function testApproveStaticWebpage()
     $job = SweteJob::createJob($this->staticSite);
     $username = '******';
     $pg = $this->addWebpageForStaticSite('page', array('Test String'));
     $job->addWebpage(SweteWebpage::loadById($pg->val('webpage_id'), 'en'));
     $tm = $job->getTranslationMemory();
     $trec = $tm->setTranslationStatus('Test String', 'Test String Translated', XFTranslationMemory::TRANSLATION_SUBMITTED, $username);
     $this->assertEquals(SweteJob::JOB_STATUS_NEW, $job->getRecord()->val('job_status'));
     $this->assertEquals(SweteJob::JOB_STATUS_CLOSED, $job->getRecord()->val('job_status'));
     foreach ($job->getWebpageRecords() as $webpage) {
         $tm = XFTranslationMemory::loadTranslationMemoryFor($webpage, $job->getRecord()->val('source_language'), $job->getRecord()->val('destination_language'));
         $this->assertTrue($tm->containsTranslation('Test String', 'Test String Translated'));
Esempio n. 4
 public function process()
     $this->translationStats = null;
     $this->translationMissLogRecord = null;
     $this->translatedPage = null;
     $proxyWriter = $this->site->getProxyWriter();
     $pageWrapper = $this->page;
     $page = $pageWrapper->getRecord();
     $tmid = null;
     if (!isset($this->translationMemory)) {
         $tmid = $pageWrapper->getTranslationMemoryId(true);
     } else {
         $tmid = $this->translationMemory->getRecord()->val('translation_memory_id');
     $translatedContent = null;
     $untranslatedContent = $page->val('webpage_content');
     if (!trim($untranslatedContent)) {
         // There is nothing to process on this page.
     if ($tmid or $this->translationMemory) {
         if ($this->translationMemory) {
             $tm = $this->translationMemory;
         } else {
             $tm = $this->getTranslationMemory($tmid);
         if ($tm) {
             if ($tm->getSourceLanguage() != $pageWrapper->getLanguage()) {
                 throw new Exception("Translation memory language does not match the record language.  Translation memory source language is " . $tm->getSourceLanguage() . " but the page language is " . $pageWrapper->getLanguage() . '.');
             $translatedContent = $proxyWriter->translateHtml($untranslatedContent, $translationStats, $this->logTranslationMisses);
             $this->translationStats = $translationStats;
             $page->setValues(array('last_translation_memory_applied' => date('Y-m-d H:i:s'), 'last_translation_memory_misses' => $translationStats['misses'], 'last_translation_memory_hits' => $translationStats['matches']));
             // Let's record the strings in this page.
             $res = df_q("delete from webpage_strings where webpage_id='" . addslashes($page->val('webpage_id')) . "'");
             if ($proxyWriter->lastStrings) {
                 $sqlpre = "insert into webpage_strings (webpage_id,string_id) values ";
                 $sql = array();
                 $wpid = $page->val('webpage_id');
                 foreach ($proxyWriter->lastStrings as $str) {
                     if (!trim($str)) {
                     if (preg_match('/^[^\\w]+$/', trim($str))) {
                         // This is to skip any strings that contain only
                         // non-word characters(e.g. numbers)
                     $encStr = TMTools::encode($str, $params);
                     $strRec = XFTranslationMemory::addString($encStr, $tm->getSourceLanguage());
                     $sql[] = '(' . $wpid . ',' . $strRec->val('string_id') . ')';
                 $sql = $sqlpre . implode(',', $sql);
             $translatedPage = SweteWebpage::loadById($page->val('webpage_id'), $this->site->getDestinationLanguage());
             $translatedPage->getRecord()->setValue('webpage_content', $translatedContent);
             $res = $translatedPage->getRecord()->save();
             if (PEAR::isError($res)) {
                 throw new Exception(mysql_error(df_db()));
             $lastApproved = $translatedPage->getLastVersionWithStatus(SweteWebpage::STATUS_APPROVED);
             if ($lastApproved and $lastApproved->val('webpage_content') == $translatedContent) {
                 $page->setValue('webpage_status', SweteWebpage::STATUS_APPROVED);
             } else {
                 if ($translationStats['matches'] > 0 and $translationStats['misses'] == 0) {
                     // We have perfect matches in what we are supposed to be translating
                     // We are either approving this page or we are marking it pending approval
                     if ($translatedPage->getAutoApprove(true)) {
                         $page->setValue('webpage_status', SweteWebpage::STATUS_APPROVED);
                         $lastApproved = $translatedPage->setStatus(SweteWebpage::STATUS_APPROVED);
                     } else {
                         $page->setValue('webpage_status', SweteWebpage::STATUS_PENDING_APPROVAL);
                 } else {
                     if ($translationStats['misses'] > 0) {
                         $page->setValue('webpage_status', SweteWebpage::STATUS_CHANGED);
                     } else {
                         $page->setValue('webpage_status', null);
             if ($this->logTranslationMisses and @$translationStats['log']) {
                 foreach ($translationStats['log'] as $str) {
                     $tlogEntry = new Dataface_Record('translation_miss_log', array());
                     $nstr = TMTools::normalize($str);
                     $estr = TMTools::encode($str, $junk);
                     $hstr = md5($estr);
                     $strRec = XFTranslationMemory::findString($estr, $this->site->getSourceLanguage());
                     if (!$strRec) {
                         $strRec = XFTranslationMemory::addString($estr, $this->site->getSourceLanguage());
                     $tlogEntry->setValues(array('string' => $str, 'normalized_string' => $nstr, 'encoded_string' => $estr, 'string_hash' => $hstr, 'date_inserted' => date('Y-m-d H:i:s'), 'webpage_id' => $page->val('webpage_id'), 'website_id' => $page->val('website_id'), 'source_language' => $this->site->getSourceLanguage(), 'destination_language' => $this->site->getDestinationLanguage(), 'translation_memory_id' => $tmid, 'string_id' => $strRec->val("string_id")));
                     if (isset($this->webpageRefreshLogId)) {
                         $tlogEntry->setValue('webpage_refresh_log_id', $this->webpageRefreshLogId);
                     if ($this->saveTranslationLogRecord) {
                         $res = $tlogEntry->save();
                         if (PEAR::isError($res)) {
                             //throw new Exception($res->getMessage());
                             // This will throw an error if there is a duplicate... we don't care... we're not interested in duplicates
                     $this->translationMissLogRecord = $tlogEntry;
             if ($this->savePage) {
                 $res = $page->save();
                 if (PEAR::isError($res)) {
                     throw new Exception($res->getMessage());
Esempio n. 5
  * Compiles the job inputs into its final form so that it can be worked on.  Before
  * this step the job just has a loose set of input webpages strings and translation
  * misses.  This will grab all of the resources that it needs to be able to 
  * present the job to a translator.  This includes loading all resources for all
  * pages used into the data structure so that the job doesn't depend on outside
  * factors.
 public function compile()
     require_once 'inc/SweteJobPageSucker.php';
     try {
         $res = SweteDb::q("select tml.webpage_id, tml.translation_miss_log_id, tml.string\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "' and\n\t\t\t\t\ttml.webpage_id is not null");
         $missedWebpageIds = array();
         while ($row = mysql_fetch_assoc($res)) {
             $missedWebpageIds[$row['webpage_id']][] = $row;
         // 1. Get all of the webpages
         $res = SweteDb::q("select webpage_id from job_inputs_webpages where job_id='" . addslashes($this->_rec->val('job_id')) . "'");
         $wpids = array();
         while ($row = mysql_fetch_row($res)) {
             $wpids[] = $row[0];
         $site = $this->getSite();
         $proxyWriter = $site->getProxyWriter();
         $jobWordCount = 0;
         foreach ($wpids as $webpageId) {
             $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language'));
             if (!$webpage) {
                 throw new Exception("Could not find webpage with id {$webpageId}");
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $webpage->getRecord()->val('webpage_content');
             $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url');
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl);
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl));
             //strings from static sites
             $strings = array();
             $res = SweteDb::q("select `string` from job_inputs_webpages_strings where job_id='" . addslashes($this->_rec->val('job_id')) . "' and webpage_id='" . addslashes($webpageId) . "'");
             while ($row = mysql_fetch_row($res)) {
                 $strings[] = $row[0];
             // Lets see if there are any other strings that were added individually to this page.
             if (isset($missedWebpageIds[$webpageId])) {
                 foreach ($missedWebpageIds[$webpageId] as $row) {
                     $strings[] = $row['string'];
             // We need to collapse duplicate strings
             $uniqueStringIndex = array();
             $uniqueStrings = array();
             foreach ($strings as $k => $str) {
                 $nstr = TMTools::normalize($str);
                 $estr = TMTools::encode($nstr, $temp);
                 if (!isset($uniqueStringIndex[$estr])) {
                     $uniqueStrings[] = $str;
                     $uniqueStringIndex[$estr] = 1;
             $strings = $uniqueStrings;
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             $tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content'));
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
         // Add the remainder of the missed webpages.
         foreach ($missedWebpageIds as $webpageId => $strings) {
             $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language'));
             if (!$webpage) {
                 throw new Exception("Could not find webpage with id {$webpageId}");
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $webpage->getRecord()->val('webpage_content');
             $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url');
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl);
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl));
             // We need to collapse duplicate strings
             $uniqueStringIndex = array();
             $uniqueStrings = array();
             foreach ($strings as $k => $missedstr) {
                 $str = $missedstr['string'];
                 $nstr = TMTools::normalize($str);
                 $estr = TMTools::normalize(TMTools::encode($nstr, $temp));
                 if (!isset($uniqueStringIndex[$estr])) {
                     $uniqueStrings[] = $str;
                     $uniqueStringIndex[$estr] = 1;
             $strings = $uniqueStrings;
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             $tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content'));
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
         // 2. Get all of the http requests without associated webpages.
         $res = SweteDb::q("select htl.http_request_log_id, tml.translation_miss_log_id, tml.string, htl.translation_memory_id\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join http_request_log htl on tml.http_request_log_id=htl.http_request_log_id \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "'");
         $hrids = array();
         while ($row = mysql_fetch_assoc($res)) {
             $hrids[$row['http_request_log_id']][] = $row;
         //$site = $this->getSite();
         //$proxyWriter = $site->getProxyWriter();
         foreach ($hrids as $hrid => $tmlids) {
             $hrRecord = df_get_record('http_request_log', array('http_request_log_id' => '=' . $hrid));
             if (!$hrRecord) {
                 $ex = new Exception("Cannot add HTTP request to job because it could not be found");
                 $ex->http_request_log_id = $hrid;
                 throw $ex;
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $hrRecord->val('response_body');
             $pageUrl = $hrRecord->val('proxy_request_url');
             if (!$pageUrl) {
                 $ex = new Exception("Failed to add HTTP request to job because it did not have an associated proxy_request_url.");
                 $ex->http_request_log = $hrid;
                 throw $ex;
             $pageUrl = $proxyWriter->unproxifyUrl($pageUrl);
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl)->save();
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => 'text/html', 'full_contents' => $pageContent, 'webpage_id' => null, 'source_url' => $hrRecord->val('request_url')));
             $tmid = null;
             $strings = array();
             foreach ($tmlids as $tmlid) {
                 $strings[] = $tmlid['string'];
                 $tmid = $tmlid['translation_memory_id'];
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             //$tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             $dict = $this->extractDictionaryFromHtml($tm, $pageContent);
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
         if ($jobWordCount == 0) {
             throw new Exception("The job has no translatable content.");
         $this->getRecord()->setValue('word_count', $jobWordCount);
         $this->getRecord()->setValue('compiled', 1);
         $res = $this->getRecord()->save();
         if (PEAR::isError($res)) {
             throw new Exception($res->getMessage());
     } catch (Exception $ex) {
         throw $ex;