function handle($params) { session_write_close(); header('Connection: close'); $app = Dataface_Application::getInstance(); $query = $app->getQuery(); if (!@$query['-job_translatable_id']) { throw new Exception("No translatable id specified"); } $translatable = df_get_record('job_translatable', array('job_translatable_id' => '=' . $query['-job_translatable_id'])); if (!$translatable) { throw new Exception("Translatable could not be found."); } $job = df_get_record('jobs', array('job_id' => '=' . $translatable->val('job_id'))); if (!$job) { throw new Exception("Job could not be loaded."); } if (!$job->checkPermission('preview job')) { header('HTTP/1.0 401 Forbidden'); exit; } require_once 'inc/SweteJob.class.php'; require_once 'inc/SweteJobPageSucker.php'; $jobO = new SweteJob($job); $pageSucker = new SweteJobPageSucker($jobO); $translation = "source"; if (@$query['-translation']) { $translation = $query['-translation']; } if ($translation == "source") { $output = $translatable->val('full_contents'); $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash='); //$output = $jobO->translateHtml($output, unserialize($job->val('previous_translations'))); } else { if ($translation == "previous") { $output = $translatable->val('full_contents'); $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash='); $output = $jobO->translatePreviousHtml($output, unserialize($job->val('previous_translations'))); } else { if ($translation == "new") { $output = $translatable->val('full_contents'); $output = $pageSucker->renderHtml($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $job->val('job_id') . '&url_hash='); $output = $jobO->translateHtml($output, unserialize($job->val('previous_translations'))); } else { throw new Exception("Invalid translation parameter " . $translation); } } } header('Content-Length: ' . strlen($output)); header('Content-type: text/html; charset="UTF-8"'); echo $output; }
function handle($params) { session_write_close(); header('Connection: close'); $app = Dataface_Application::getInstance(); $query = $app->getQuery(); if (!@$query['job_id']) { throw new Exception("NO job id specified"); } if (!@$query['url_hash']) { throw new Exception("No URL Hash specified"); } $job = df_get_record('jobs', array('job_id' => '=' . $query['job_id'])); if (!$job) { throw new Exception("Job could not be found."); } require_once 'inc/SweteJob.class.php'; require_once 'inc/SweteJobPageSucker.php'; $jobO = new SweteJob($job); $pageSucker = new SweteJobPageSucker($jobO); $resource = $pageSucker->loadResource($query['url_hash']); if (!$resource) { header('HTTP/1.0 404 Not Found'); exit; } if (!$job->checkPermission('preview job')) { header('HTTP/1.0 400 Permission denied'); exit; } $res = df_q("select * from job_content where job_content_id='" . addslashes($resource->val('job_content_id')) . "' limit 1"); $content = mysql_fetch_object($res); $output = $content->content; if (preg_match('#css#', $content->content_type)) { $output = $pageSucker->renderCss($output, DATAFACE_SITE_HREF . '?-action=swete_job_serve_content&job_id=' . $query['job_id'] . '&url_hash='); } header('Content-Length: ' . strlen($output)); header('Content-Type: ' . $content->content_type); echo $output; flush(); }
/** * Compiles the job inputs into its final form so that it can be worked on. Before * this step the job just has a loose set of input webpages strings and translation * misses. This will grab all of the resources that it needs to be able to * present the job to a translator. This includes loading all resources for all * pages used into the data structure so that the job doesn't depend on outside * factors. */ public function compile() { require_once 'inc/SweteJobPageSucker.php'; try { $res = SweteDb::q("select tml.webpage_id, tml.translation_miss_log_id, tml.string\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "' and\n\t\t\t\t\ttml.webpage_id is not null"); $missedWebpageIds = array(); while ($row = mysql_fetch_assoc($res)) { $missedWebpageIds[$row['webpage_id']][] = $row; } @mysql_free_result($res); // 1. Get all of the webpages $res = SweteDb::q("select webpage_id from job_inputs_webpages where job_id='" . addslashes($this->_rec->val('job_id')) . "'"); $wpids = array(); while ($row = mysql_fetch_row($res)) { $wpids[] = $row[0]; } $site = $this->getSite(); $proxyWriter = $site->getProxyWriter(); $jobWordCount = 0; @mysql_free_result($res); foreach ($wpids as $webpageId) { $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language')); if (!$webpage) { throw new Exception("Could not find webpage with id {$webpageId}"); } $webpage->setSite($site); // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $webpage->getRecord()->val('webpage_content'); $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url'); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl)); //strings from static sites $strings = array(); $res = SweteDb::q("select `string` from job_inputs_webpages_strings where job_id='" . addslashes($this->_rec->val('job_id')) . "' and webpage_id='" . addslashes($webpageId) . "'"); while ($row = mysql_fetch_row($res)) { $strings[] = $row[0]; } @mysql_free_result($res); // Lets see if there are any other strings that were added individually to this page. if (isset($missedWebpageIds[$webpageId])) { foreach ($missedWebpageIds[$webpageId] as $row) { $strings[] = $row['string']; } unset($missedWebpageIds[$webpageId]); } // We need to collapse duplicate strings $uniqueStringIndex = array(); $uniqueStrings = array(); foreach ($strings as $k => $str) { $nstr = TMTools::normalize($str); $estr = TMTools::encode($nstr, $temp); if (!isset($uniqueStringIndex[$estr])) { $uniqueStrings[] = $str; $uniqueStringIndex[$estr] = 1; } } $strings = $uniqueStrings; $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations $tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content')); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } // Add the remainder of the missed webpages. foreach ($missedWebpageIds as $webpageId => $strings) { $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language')); if (!$webpage) { throw new Exception("Could not find webpage with id {$webpageId}"); } $webpage->setSite($site); // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $webpage->getRecord()->val('webpage_content'); $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url'); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl)); // We need to collapse duplicate strings $uniqueStringIndex = array(); $uniqueStrings = array(); foreach ($strings as $k => $missedstr) { $str = $missedstr['string']; $nstr = TMTools::normalize($str); $estr = TMTools::normalize(TMTools::encode($nstr, $temp)); if (!isset($uniqueStringIndex[$estr])) { $uniqueStrings[] = $str; $uniqueStringIndex[$estr] = 1; } } $strings = $uniqueStrings; $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); //strings $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations $tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content')); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } // 2. Get all of the http requests without associated webpages. $res = SweteDb::q("select htl.http_request_log_id, tml.translation_miss_log_id, tml.string, htl.translation_memory_id\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join http_request_log htl on tml.http_request_log_id=htl.http_request_log_id \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "'"); $hrids = array(); while ($row = mysql_fetch_assoc($res)) { $hrids[$row['http_request_log_id']][] = $row; } //$site = $this->getSite(); //$proxyWriter = $site->getProxyWriter(); @mysql_free_result($res); foreach ($hrids as $hrid => $tmlids) { $hrRecord = df_get_record('http_request_log', array('http_request_log_id' => '=' . $hrid)); if (!$hrRecord) { $ex = new Exception("Cannot add HTTP request to job because it could not be found"); $ex->http_request_log_id = $hrid; throw $ex; } // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $hrRecord->val('response_body'); $pageUrl = $hrRecord->val('proxy_request_url'); if (!$pageUrl) { $ex = new Exception("Failed to add HTTP request to job because it did not have an associated proxy_request_url."); $ex->http_request_log = $hrid; throw $ex; } $pageUrl = $proxyWriter->unproxifyUrl($pageUrl); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl)->save(); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => 'text/html', 'full_contents' => $pageContent, 'webpage_id' => null, 'source_url' => $hrRecord->val('request_url'))); $tmid = null; $strings = array(); foreach ($tmlids as $tmlid) { $strings[] = $tmlid['string']; $tmid = $tmlid['translation_memory_id']; } $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations //$tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $pageContent); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } if ($jobWordCount == 0) { throw new Exception("The job has no translatable content."); } $this->getRecord()->setValue('word_count', $jobWordCount); $this->getRecord()->setValue('compiled', 1); $res = $this->getRecord()->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage()); } SweteDb::q("commit"); } catch (Exception $ex) { SweteDb::q("rollback"); throw $ex; } }