function handle($params)
 {
     session_write_close();
     header('Connection: close');
     $app = Dataface_Application::getInstance();
     $query = $app->getQuery();
     if (!@$query['-job_translatable_id']) {
         throw new Exception("No translatable id specified");
     }
     $translatable = df_get_record('job_translatable', array('job_translatable_id' => '=' . $query['-job_translatable_id']));
     if (!$translatable) {
         throw new Exception("Translatable could not be found.");
     }
     $job = df_get_record('jobs', array('job_id' => '=' . $translatable->val('job_id')));
     if (!$job) {
         throw new Exception("Job could not be loaded.");
     }
     if (!$job->checkPermission('preview job')) {
         header('HTTP/1.0 401 Forbidden');
         exit;
     }
     require_once 'inc/SweteJob.class.php';
     require_once 'inc/SweteJobPageSucker.php';
     $jobO = new SweteJob($job);
     $pageSucker = new SweteJobPageSucker($jobO);
     $translation = "source";
     if (@$query['-translation']) {
         $translation = $query['-translation'];
     }
     if ($translation == "source") {
         $output = $translatable->val('full_contents');
         $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash=');
         //$output = $jobO->translateHtml($output, unserialize($job->val('previous_translations')));
     } else {
         if ($translation == "previous") {
             $output = $translatable->val('full_contents');
             $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash=');
             $output = $jobO->translatePreviousHtml($output, unserialize($job->val('previous_translations')));
         } else {
             if ($translation == "new") {
                 $output = $translatable->val('full_contents');
                 $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash=');
                 $output = $jobO->translateHtml($output, unserialize($job->val('previous_translations')));
             } else {
                 throw new Exception("Invalid translation parameter " . $translation);
             }
         }
     }
     header('Content-Length: ' . strlen($output));
     header('Content-type: text/html; charset="UTF-8"');
     echo $output;
 }
 function handle($params)
 {
     session_write_close();
     header('Connection: close');
     $app = Dataface_Application::getInstance();
     $query = $app->getQuery();
     if (!@$query['job_id']) {
         throw new Exception("NO job id specified");
     }
     if (!@$query['url_hash']) {
         throw new Exception("No URL Hash specified");
     }
     $job = df_get_record('jobs', array('job_id' => '=' . $query['job_id']));
     if (!$job) {
         throw new Exception("Job could not be found.");
     }
     require_once 'inc/SweteJob.class.php';
     require_once 'inc/SweteJobPageSucker.php';
     $jobO = new SweteJob($job);
     $pageSucker = new SweteJobPageSucker($jobO);
     $resource = $pageSucker->loadResource($query['url_hash']);
     if (!$resource) {
         header('HTTP/1.0 404 Not Found');
         exit;
     }
     if (!$job->checkPermission('preview job')) {
         header('HTTP/1.0 400 Permission denied');
         exit;
     }
     $res = df_q("select * from job_content where job_content_id='" . addslashes($resource->val('job_content_id')) . "' limit 1");
     $content = mysql_fetch_object($res);
     $output = $content->content;
     if (preg_match('#css#', $content->content_type)) {
         $output = $pageSucker->renderCss($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $query['job_id'] . '&url_hash=');
     }
     header('Content-Length: ' . strlen($output));
     header('Content-Type: ' . $content->content_type);
     echo $output;
     flush();
 }
Beispiel #3
0
 /**
  * Compiles the job inputs into its final form so that it can be worked on.  Before
  * this step the job just has a loose set of input webpages strings and translation
  * misses.  This will grab all of the resources that it needs to be able to 
  * present the job to a translator.  This includes loading all resources for all
  * pages used into the data structure so that the job doesn't depend on outside
  * factors.
  */
 public function compile()
 {
     require_once 'inc/SweteJobPageSucker.php';
     try {
         $res = SweteDb::q("select tml.webpage_id, tml.translation_miss_log_id, tml.string\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "' and\n\t\t\t\t\ttml.webpage_id is not null");
         $missedWebpageIds = array();
         while ($row = mysql_fetch_assoc($res)) {
             $missedWebpageIds[$row['webpage_id']][] = $row;
         }
         @mysql_free_result($res);
         // 1. Get all of the webpages
         $res = SweteDb::q("select webpage_id from job_inputs_webpages where job_id='" . addslashes($this->_rec->val('job_id')) . "'");
         $wpids = array();
         while ($row = mysql_fetch_row($res)) {
             $wpids[] = $row[0];
         }
         $site = $this->getSite();
         $proxyWriter = $site->getProxyWriter();
         $jobWordCount = 0;
         @mysql_free_result($res);
         foreach ($wpids as $webpageId) {
             $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language'));
             if (!$webpage) {
                 throw new Exception("Could not find webpage with id {$webpageId}");
             }
             $webpage->setSite($site);
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $webpage->getRecord()->val('webpage_content');
             $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url');
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl);
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl));
             //strings from static sites
             $strings = array();
             $res = SweteDb::q("select `string` from job_inputs_webpages_strings where job_id='" . addslashes($this->_rec->val('job_id')) . "' and webpage_id='" . addslashes($webpageId) . "'");
             while ($row = mysql_fetch_row($res)) {
                 $strings[] = $row[0];
             }
             @mysql_free_result($res);
             // Lets see if there are any other strings that were added individually to this page.
             if (isset($missedWebpageIds[$webpageId])) {
                 foreach ($missedWebpageIds[$webpageId] as $row) {
                     $strings[] = $row['string'];
                 }
                 unset($missedWebpageIds[$webpageId]);
             }
             // We need to collapse duplicate strings
             $uniqueStringIndex = array();
             $uniqueStrings = array();
             foreach ($strings as $k => $str) {
                 $nstr = TMTools::normalize($str);
                 $estr = TMTools::encode($nstr, $temp);
                 if (!isset($uniqueStringIndex[$estr])) {
                     $uniqueStrings[] = $str;
                     $uniqueStringIndex[$estr] = 1;
                 }
             }
             $strings = $uniqueStrings;
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             $tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             }
             $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content'));
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
             }
         }
         // Add the remainder of the missed webpages.
         foreach ($missedWebpageIds as $webpageId => $strings) {
             $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language'));
             if (!$webpage) {
                 throw new Exception("Could not find webpage with id {$webpageId}");
             }
             $webpage->setSite($site);
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $webpage->getRecord()->val('webpage_content');
             $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url');
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl);
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl));
             // We need to collapse duplicate strings
             $uniqueStringIndex = array();
             $uniqueStrings = array();
             foreach ($strings as $k => $missedstr) {
                 $str = $missedstr['string'];
                 $nstr = TMTools::normalize($str);
                 $estr = TMTools::normalize(TMTools::encode($nstr, $temp));
                 if (!isset($uniqueStringIndex[$estr])) {
                     $uniqueStrings[] = $str;
                     $uniqueStringIndex[$estr] = 1;
                 }
             }
             $strings = $uniqueStrings;
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             //strings
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             $tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             }
             $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content'));
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
             }
         }
         // 2. Get all of the http requests without associated webpages.
         $res = SweteDb::q("select htl.http_request_log_id, tml.translation_miss_log_id, tml.string, htl.translation_memory_id\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join http_request_log htl on tml.http_request_log_id=htl.http_request_log_id \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "'");
         $hrids = array();
         while ($row = mysql_fetch_assoc($res)) {
             $hrids[$row['http_request_log_id']][] = $row;
         }
         //$site = $this->getSite();
         //$proxyWriter = $site->getProxyWriter();
         @mysql_free_result($res);
         foreach ($hrids as $hrid => $tmlids) {
             $hrRecord = df_get_record('http_request_log', array('http_request_log_id' => '=' . $hrid));
             if (!$hrRecord) {
                 $ex = new Exception("Cannot add HTTP request to job because it could not be found");
                 $ex->http_request_log_id = $hrid;
                 throw $ex;
             }
             // Use a page sucker to suck all of the resources used by this webpage.
             $pageSucker = new SweteJobPageSucker($this);
             $pageContent = $hrRecord->val('response_body');
             $pageUrl = $hrRecord->val('proxy_request_url');
             if (!$pageUrl) {
                 $ex = new Exception("Failed to add HTTP request to job because it did not have an associated proxy_request_url.");
                 $ex->http_request_log = $hrid;
                 throw $ex;
             }
             $pageUrl = $proxyWriter->unproxifyUrl($pageUrl);
             $pageContent = $pageSucker->processHtml($pageContent, $pageUrl)->save();
             $translatable = new Dataface_Record('job_translatable', array());
             $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => 'text/html', 'full_contents' => $pageContent, 'webpage_id' => null, 'source_url' => $hrRecord->val('request_url')));
             $tmid = null;
             $strings = array();
             foreach ($tmlids as $tmlid) {
                 $strings[] = $tmlid['string'];
                 $tmid = $tmlid['translation_memory_id'];
             }
             $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>');
             //set the word count
             $pageWordCount = self::getPageWordCount($strings);
             $jobWordCount += $pageWordCount;
             $translatable->setValue('word_count', $pageWordCount);
             // Now we need to get the previous translations
             //$tmid = $webpage->getTranslationMemoryId(true);
             $tm = XFTranslationMemory::loadTranslationMemoryById($tmid);
             if (!$tm) {
                 throw new Exception("Could not find translation memory with id {$tmid}");
             }
             $dict = $this->extractDictionaryFromHtml($tm, $pageContent);
             $translatable->setValue('previous_translations', serialize($dict));
             $res = $translatable->save();
             if (PEAR::isError($res)) {
                 throw new Exception($res->getMessage(), $res->getCode());
             }
         }
         if ($jobWordCount == 0) {
             throw new Exception("The job has no translatable content.");
         }
         $this->getRecord()->setValue('word_count', $jobWordCount);
         $this->getRecord()->setValue('compiled', 1);
         $res = $this->getRecord()->save();
         if (PEAR::isError($res)) {
             throw new Exception($res->getMessage());
         }
         SweteDb::q("commit");
     } catch (Exception $ex) {
         SweteDb::q("rollback");
         throw $ex;
     }
 }