public function process() { $this->translationStats = null; $this->translationMissLogRecord = null; $this->translatedPage = null; $proxyWriter = $this->site->getProxyWriter(); $pageWrapper = $this->page; $page = $pageWrapper->getRecord(); $tmid = null; if (!isset($this->translationMemory)) { $tmid = $pageWrapper->getTranslationMemoryId(true); } else { $tmid = $this->translationMemory->getRecord()->val('translation_memory_id'); } $translatedContent = null; $untranslatedContent = $page->val('webpage_content'); if (!trim($untranslatedContent)) { // There is nothing to process on this page. return; } if ($tmid or $this->translationMemory) { if ($this->translationMemory) { $tm = $this->translationMemory; } else { $tm = $this->getTranslationMemory($tmid); } if ($tm) { if ($tm->getSourceLanguage() != $pageWrapper->getLanguage()) { throw new Exception("Translation memory language does not match the record language. Translation memory source language is " . $tm->getSourceLanguage() . " but the page language is " . $pageWrapper->getLanguage() . '.'); } $proxyWriter->setTranslationMemory($tm); $proxyWriter->setMinTranslationStatus($this->translateMinStatus); $translatedContent = $proxyWriter->translateHtml($untranslatedContent, $translationStats, $this->logTranslationMisses); $this->translationStats = $translationStats; $page->setValues(array('last_translation_memory_applied' => date('Y-m-d H:i:s'), 'last_translation_memory_misses' => $translationStats['misses'], 'last_translation_memory_hits' => $translationStats['matches'])); // Let's record the strings in this page. $res = df_q("delete from webpage_strings where webpage_id='" . addslashes($page->val('webpage_id')) . "'"); if ($proxyWriter->lastStrings) { //print_r($proxyWriter->lastStrings);exit; $sqlpre = "insert into webpage_strings (webpage_id,string_id) values "; $sql = array(); $wpid = $page->val('webpage_id'); foreach ($proxyWriter->lastStrings as $str) { if (!trim($str)) { continue; } if (preg_match('/^[^\\w]+$/', trim($str))) { // This is to skip any strings that contain only // non-word characters(e.g. numbers) continue; } $encStr = TMTools::encode($str, $params); $strRec = XFTranslationMemory::addString($encStr, $tm->getSourceLanguage()); $sql[] = '(' . $wpid . ',' . $strRec->val('string_id') . ')'; } $sql = $sqlpre . implode(',', $sql); df_q($sql); } $translatedPage = SweteWebpage::loadById($page->val('webpage_id'), $this->site->getDestinationLanguage()); $translatedPage->getRecord()->setValue('webpage_content', $translatedContent); $res = $translatedPage->getRecord()->save(); if (PEAR::isError($res)) { throw new Exception(mysql_error(df_db())); } $lastApproved = $translatedPage->getLastVersionWithStatus(SweteWebpage::STATUS_APPROVED); if ($lastApproved and $lastApproved->val('webpage_content') == $translatedContent) { $page->setValue('webpage_status', SweteWebpage::STATUS_APPROVED); } else { if ($translationStats['matches'] > 0 and $translationStats['misses'] == 0) { // We have perfect matches in what we are supposed to be translating // We are either approving this page or we are marking it pending approval if ($translatedPage->getAutoApprove(true)) { $page->setValue('webpage_status', SweteWebpage::STATUS_APPROVED); $lastApproved = $translatedPage->setStatus(SweteWebpage::STATUS_APPROVED); } else { $page->setValue('webpage_status', SweteWebpage::STATUS_PENDING_APPROVAL); } } else { if ($translationStats['misses'] > 0) { $page->setValue('webpage_status', SweteWebpage::STATUS_CHANGED); } else { $page->setValue('webpage_status', null); } } } if ($this->logTranslationMisses and @$translationStats['log']) { //print_r($translationStats);exit; foreach ($translationStats['log'] as $str) { $tlogEntry = new Dataface_Record('translation_miss_log', array()); $nstr = TMTools::normalize($str); $estr = TMTools::encode($str, $junk); $hstr = md5($estr); $strRec = XFTranslationMemory::findString($estr, $this->site->getSourceLanguage()); if (!$strRec) { $strRec = XFTranslationMemory::addString($estr, $this->site->getSourceLanguage()); } $tlogEntry->setValues(array('string' => $str, 'normalized_string' => $nstr, 'encoded_string' => $estr, 'string_hash' => $hstr, 'date_inserted' => date('Y-m-d H:i:s'), 'webpage_id' => $page->val('webpage_id'), 'website_id' => $page->val('website_id'), 'source_language' => $this->site->getSourceLanguage(), 'destination_language' => $this->site->getDestinationLanguage(), 'translation_memory_id' => $tmid, 'string_id' => $strRec->val("string_id"))); if (isset($this->webpageRefreshLogId)) { $tlogEntry->setValue('webpage_refresh_log_id', $this->webpageRefreshLogId); } if ($this->saveTranslationLogRecord) { $res = $tlogEntry->save(); if (PEAR::isError($res)) { //throw new Exception($res->getMessage()); // This will throw an error if there is a duplicate... we don't care... we're not interested in duplicates } } $this->translationMissLogRecord = $tlogEntry; } } if ($this->savePage) { $res = $page->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage()); } } } } }
/** * @brief Handles an HTTP request. Processes the inputs and returns the * correct output. */ public function handleRequest() { $this->mark("handleRequest: " . $this->URL); $url = $this->URL; $siteId = $this->site->getRecord()->val('website_id'); if (file_exists('sites/' . basename($siteId) . '/Delegate.php')) { require_once 'sites/' . basename($siteId) . '/Delegate.php'; $clazz = 'sites_' . intval($siteId) . '_Delegate'; if (class_exists($clazz) and method_exists($clazz, 'init')) { call_user_func(array($clazz, 'init')); } } $proxyWriter = $this->site->getProxyWriter(); $proxyWriter->useHtml5Parser = $this->useHtml5Parser; $logger = $this->logger; $logger->proxyRequestUrl = $url; $isPost = strtolower($this->SERVER['REQUEST_METHOD']) === 'post'; if ($isPost) { // We cannot cache post requests. // The cacher knows this, but let's make doubly sure. Dataface_Application::getInstance()->_conf['nocache'] = 1; } if (!$isPost and !preg_match('#\\.(ico|ICO|gif|GIF|jpg|JPG|jpeg|JPEG|SWF|swf|css|CSS|png|PNG|pdf|PDF|doc|DOC|svg|SVG|fla|FLA|zip|ZIP|js|JS)$#', $url)) { $logger->proxyRequestHeaders = serialize(apache_request_headers()); $logger->proxyRequestPostVars = serialize($this->POST); $logger->proxyRequestMethod = $this->SERVER['REQUEST_METHOD']; $logger->settingsSiteId = $this->site->getRecord()->val('settings_site_id'); $this->mark('Loading webpage'); $page = $this->site->loadWebpageByProxifiedUrl($url); $this->mark('Webpage loaded'); if ($page) { $logger->webpageId = $page->getRecord()->val('webpage_id'); if (!$page->getRecord()->val('active')) { $logger->webpageNotUsedReason = 'Not active'; } else { $this->mark('Loading latest version with status ' . $this->pageStatus); $version = $page->getLastVersionWithStatus($this->pageStatus, $this->site->getDestinationLanguage()); $this->mark('Version loaded'); if ($version) { $logger->webpageVersionId = $version->val('webpage_version_id'); //print_r($version->vals());exit; $this->mark('Proxifying html'); $out = $this->site->getProxyWriter()->proxifyHtml($version->val('page_content')); $this->mark('Finished proxifying html'); $logger->outputContent = $out; $this->header('Content-Length: ' . strlen($out)); $this->header('Connection: close'); $this->header('Cache-Control: max-age=36000'); $this->header('X-SWeTE-Handler: ProxyServer/Static page/v' . $logger->webpageVersionId . '/' . __LINE__); $this->output($out); while (@ob_end_flush()) { } flush(); $this->mark('Flushed contents to browser'); $logger->outputResponseHeaders = serialize(headers_list()); $logger->save(); return; } } } } //Wasn't found so we try to load the $this->mark('Getting the source page'); if (@$this->inputContent) { // The content was provided.. we don't need to try to load it from // the source site $client = $this->createClientForInputContent(); } else { if ($this->liveCache and $this->liveCache->client) { $client = $this->liveCache->client; } else { $client = $this->getSourcePage(); } } $this->mark('Source page loaded'); //echo "We got the source page."; //print_r($client->headers); $isHtml = preg_match('/html|xml/', $client->contentType); $isJson = (preg_match('/json/', $client->contentType) or $client->content and ($client->content[0] == '{' or $client->content[0] == '[')); $isCSS = preg_match('/css/', $client->contentType); $json = null; if ($isJson) { $json = json_decode($client->content, true); if (isset($json)) { $html = $proxyWriter->jsonToHtml($json); $isHtml = isset($html); if ($isHtml) { $client->content = $html; } else { $isJson = false; } } else { $isJson = false; } } $delegate = new ProxyClientPreprocessor($this->site->getRecord()->val('website_id')); $delegate->preprocessHeaders($client->headers); $headers = $proxyWriter->proxifyHeaders($client->headers, true); $locHeaders = preg_grep('/^Location:/i', $headers); // Let's see if this should be a passthru $translationMode = $delegate->getTranslationMode($client); if (!$isHtml and !$isCSS and $translationMode !== ProxyClient::TRANSLATION_MODE_TRANSLATE) { //$skip_decoration_phase = true; $cacheControlSet = false; foreach ($headers as $h) { if (preg_match('/^Cache-control:(.*)$/i', $h, $matches)) { // We need to respect caching rules. // If this content is private then we cannot cache it $cacheControlSet = true; if (preg_match('/private|no-store|max-age=0|s-maxage=0/', $matches[1])) { Dataface_Application::getInstance()->_conf['nocache']; } } $this->header($h); } $this->header('Content-Length: ' . strlen($client->content)); $this->header('Connection: close'); $this->header('X-SWeTE-Handler: ProxyServer Unprocessed/Non-HTML/Non-CSS/' . __LINE__); //if ( !$cacheControlSet ) $this->header('Cache-Control: max-age=3600, public'); $this->output($client->content); if (!$this->buffer) { while (@ob_end_flush()) { } flush(); } $this->mark('Flushed non-html content'); return; } $stats = array(); if ($isHtml and $translationMode !== ProxyClient::TRANSLATION_MODE_NOTRANSLATE and !$locHeaders) { $this->mark('Preprocessing page content'); $client->content = $delegate->preprocess($client->content); $this->mark('Finished preprocessing'); $logger->requestDate = date('Y-m-d H:i:s'); $logger->proxyRequestUrl = $url; $logger->proxyRequestHeaders = serialize(apache_request_headers()); $logger->proxyRequestPostVars = serialize($this->POST); $logger->proxyRequestMethod = $this->SERVER['REQUEST_METHOD']; $logger->websiteId = $this->site->getRecord()->val('website_id'); $this->mark('Getting the profile for this page'); $profile = $this->site->getProfile($proxyWriter->stripBasePath($url)); $this->mark('Profile retrieved'); if ($profile and $profile->val('enable_live_translation')) { if (isset($this->liveCache)) { $this->liveCache->live = true; } try { $translation_memory_id = $profile->val('translation_memory_id'); if (!$translation_memory_id) { throw new Exception("No translation memory is set up for this page: " . $url); } if (isset($this->liveCache)) { $this->liveCache->translationMemoryId = $translation_memory_id; } require_once 'modules/tm/lib/XFTranslationMemory.php'; //$minApprovalLevel = $profile->val('live_translation_min_approval_level'); $minApprovalLevel = 3; //XFTranslationMemory::TRANSLATION_APPROVED; $logger->liveTranslationEnabled = 1; $logger->liveTranslationMinStatus = $minApprovalLevel; $this->mark('Loading translation memory: ' . $translation_memory_id); $tm = XFTranslationMemory::loadTranslationMemoryById($translation_memory_id); $this->mark('Translation memory loaded'); $logger->translationMemoryId = $translation_memory_id; $proxyWriter->setTranslationMemory($tm); $proxyWriter->setMinTranslationStatus($minApprovalLevel); $this->mark('Translating html'); $client->content = $proxyWriter->translateHtml($client->content, $stats, $this->logTranslationMisses); $this->mark('Translation complete'); //print_r($stats);exit; $logger->liveTranslationHits = $stats['matches']; $logger->liveTranslationMisses = $stats['misses']; } catch (Exception $ex) { error_log($ex->getMessage()); } } $this->mark('PROXIFY HTML START'); $client->content = $proxyWriter->proxifyHtml($client->content); $this->mark('PROXIFY HTML END'); //$client->content = preg_replace('#</head>#', '<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script><script src="http://localhost/sfutheme/js/newclf.js"></script><link rel="stylesheet" type="text/css" href="http://localhost/sfutheme/css/newclf.css"/></head>', $client->content); if ($isJson) { $client->content = $proxyWriter->htmlToJson($json, $client->content); } } else { if ($isCSS) { if (isset($this->liveCache)) { $this->liveCache->live = true; } $logger->requestLoggingEnabled = false; $this->mark('PROXIFY CSS START'); $client->content = $proxyWriter->proxifyCss($client->content); $this->mark('PROXIFY CSS END'); } else { $logger->requestLoggingEnabled = false; } } $cacheControlSet = false; foreach ($headers as $h) { if (preg_match('/^Cache-control:(.*)$/i', $h, $matches)) { // We need to respect caching rules. // If this content is private then we cannot cache it $cacheControlSet = true; if (preg_match('/private|no-store|max-age=0|s-maxage=0/', $matches[1])) { Dataface_Application::getInstance()->_conf['nocache'] = 1; } } //error_log("Setting header: $h"); $this->header($h); } $this->header('Content-Length: ' . strlen($client->content)); $this->header('X-SWeTE-Handler: ProxyServer Live/Processed/' . __LINE__); $this->header('Connection: close'); // We won't add our own cache-control. We'll let the source site decide this and send // their own headers. if (!$cacheControlSet and class_exists('Xataface_Scaler') and !@Dataface_Application::getInstance()->_conf['nocache']) { //$this->header('Cache-Control: max-age=3600'); } if ($this->inputContentType === 'text/plain') { $client->content = trim(strip_tags($client->content)); } $this->output($client->content); if (!$this->buffer) { while (@ob_end_flush()) { } flush(); if (isset($this->liveCache)) { $this->mark('The live cache is enabled. Lets set the content'); $this->liveCache->siteId = $this->site->getRecord()->val('website_id'); $this->liveCache->sourceLanguage = $this->site->getSourceLanguage(); $this->liveCache->proxyLanguage = $this->site->getDestinationLanguage(); $this->liveCache->proxyUrl = $this->site->getProxyUrl(); $this->liveCache->siteUrl = $this->site->getSiteUrl(); $this->liveCache->sourceDateLocale = $this->site->getRecord()->val('source_date_locale'); $this->liveCache->targetDateLocale = $this->site->getRecord()->val('target_date_locale'); if ($this->site->getRecord()->val('translation_parser_version')) { $this->liveCache->translationParserVersion = intval($this->site->getRecord()->val('translation_parser_version')); } $this->liveCache->content = $client->content; $this->liveCache->headers = headers_list(); $this->liveCache->calculateExpires(); if ($this->logTranslationMisses) { $this->liveCache->skipLiveCache = true; } else { $this->liveCache->skipLiveCache = false; } $this->mark('About to check if resource can be cached.'); if ($this->liveCache->expires > time()) { $this->mark('Caching resource for live cache'); $this->liveCache->save(); if (!$this->liveCache->noServerCache) { $this->liveCache->saveContent(); } $this->mark('Finished cashing resource for live cache.'); } else { if ($this->enableProfiling) { $this->mark('Resource cannot be cached with live cache. Expiry is ' . date($this->liveCache->expires) . ' but now is ' . time() . '.'); } $this->mark('Saving just the cache info entry'); $this->liveCache->save(); } } } $this->mark('Content flushed'); $logger->outputContent = $client->content; $logger->outputResponseHeaders = serialize(headers_list()); $logger->save(); $this->mark('Loading the translation miss log'); $tlogEntry = new Dataface_Record('translation_miss_log', array()); if ($this->logTranslationMisses and @$stats['log']) { $this->mark('ITERATING TRANSLATION MISSES START (' . count($stats['log']) . ')'); foreach ($stats['log'] as $str) { $tlogEntry = new Dataface_Record('translation_miss_log', array()); $nstr = TMTools::normalize($str); $trimStripped = trim(strip_tags($nstr)); if (!$trimStripped) { continue; } if (preg_match('/^[0-9 \\.,%\\$#@\\(\\)\\!\\?\'":\\+=\\-\\/><]*$/', $trimStripped)) { continue; } // If the string is just a number or non-word we just skip it. $estr = TMTools::normalize(TMTools::encode($nstr, $junk)); $strRec = XFTranslationMemory::addString($estr, $this->site->getSourceLanguage()); $hstr = md5($estr); $tlogEntry->setValues(array('http_request_log_id' => $logger->getRecord()->val('http_request_log_id'), 'string' => $str, 'normalized_string' => $nstr, 'encoded_string' => $estr, 'string_hash' => $hstr, 'date_inserted' => date('Y-m-d H:i:s'), 'website_id' => $this->site->getRecord()->val('website_id'), 'source_language' => $this->site->getSourceLanguage(), 'destination_language' => $this->site->getDestinationLanguage(), 'translation_memory_id' => @$translation_memory_id, 'string_id' => $strRec->val('string_id'))); $res = $tlogEntry->save(); if (PEAR::isError($res)) { //throw new Exception($res->getMessage()); // This will throw an error if there is a duplicate... we don't care... we're not interested in duplicates } } $this->mark('ITERATING TRANSLATION MISSES END'); } return; }
public function import() { $fh = fopen($this->inputFilePath, 'r'); if (!$fh) { throw new Exception(sprintf("Failed to open input file '%s'", $this->inputFilePath)); } $headers = array_flip(fgetcsv($fh, 0, $this->separator)); $required_fields = array('normalized_string', 'normalized_translation_value'); if (!isset($this->targetTranslationMemory)) { $required_fields[] = 'translation_memory_uuid'; } foreach ($required_fields as $f) { if (!array_key_exists($f, $headers)) { throw new Exception(sprintf("Missing required column heading: %s", $f)); } } while (($row = fgetcsv($fh, 0, $this->separator)) !== false) { $string = $row[$headers['normalized_string']]; $translation = $row[$headers['normalized_translation_value']]; $translationMemory = $this->targetTranslationMemory; $tmuuid = $row[$headers['translation_memory_uuid']]; if (!isset($translationMemory)) { $translationMemory = XFTranslationMemory::loadTranslationMemoryByUuid($tmuuid); } if (!isset($translationMemory)) { $this->errors[] = array('row' => $row, 'message' => 'No translation memory assigned.'); $this->failed++; continue; } $strRec = XFTranslationMemory::addString($string, $translationMemory->getSourceLanguage()); $res = df_q(sprintf("select string_id from translation_miss_log where string_id=%d and translation_memory_id=%d", $strRec->val('string_id'), $translationMemory->getRecord()->val('translation_memory_id'))); if (mysql_num_rows($res) == 0) { @mysql_free_result($res); // This string is not in the translation miss log yet. We // will import it now $tlogEntry = new Dataface_Record('translation_miss_log', array()); $nstr = TMTools::normalize($string); $trimStripped = trim(strip_tags($nstr)); if (!$trimStripped) { continue; } if (preg_match('/^[0-9 \\.,%\\$#@\\(\\)\\!\\?\'":\\+=\\-\\/><]*$/', $trimStripped)) { continue; } // If the string is just a number or non-word we just skip it. //$estr = TMTools::normalize(TMTools::encode($nstr, $junk)); // We don't need to encode the string $res = df_q(sprintf("select website_id from websites where translation_memory_id=%d", $translationMemory->getRecord()->val('translation_memory_id'))); if (!$res) { $this->failed++; $this->errors[] = array('row' => $row, 'message' => sprintf("No website found for translation memory %d", $translationMemory->getRecord()->val('translation_memory_id'))); continue; } list($websiteId) = mysql_fetch_row($res); @mysql_free_result($res); $hstr = md5($string); $tlogEntry->setValues(array('http_request_log_id' => null, 'string' => $string, 'normalized_string' => $string, 'encoded_string' => $string, 'string_hash' => $hstr, 'date_inserted' => date('Y-m-d H:i:s'), 'website_id' => $websiteId, 'source_language' => $translationMemory->getSourceLanguage(), 'destination_language' => $translationMemory->getDestinationLanguage(), 'translation_memory_id' => $translationMemory->getRecord()->val('translation_memory_id'), 'string_id' => $strRec->val('string_id'))); $res = $tlogEntry->save(); if (PEAR::isError($res)) { $this->errors[] = array('row' => $row, 'message' => 'Failed to insert translation miss log entry: ' . $res->getMessage()); } } if (@trim($translation)) { try { $translationMemory->setTranslationStatus($string, $translation, XFTranslationMemory::TRANSLATION_APPROVED); } catch (Exception $ex) { $this->failed++; $this->errors[] = array('row' => $row, 'message' => 'Failed to set translation status: ' . $ex->getMessage()); continue; } } else { // No translation provided we don't need to import the translation } $this->succeeded++; } }
function addWebpageForStaticSite($url, $strings, $user = '******', $lang = 'en') { $content = ""; $stringRecords = array(); foreach ($strings as $string) { $strRec = XFTranslationMemory::addString($string, $lang); $stringId = $strRec->val('string_id'); //$strings[$string]['string_id'] = $stringId; $stringRecords[$stringId] = $string; $content .= '<div>' . $string . '</div>'; } //create a webpage for the strings $pg = new Dataface_Record('webpages', array()); $pg->lang = 'en'; $pg->setValues(array('website_id' => $this->staticSite->getRecord()->val('website_id'), 'webpage_url' => $url, 'webpage_content' => $content, 'active' => 1, 'posted_by' => 'test_user')); $res = $pg->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage()); } $pgid = $pg->val('webpage_id'); //create some translation misses for the page foreach ($stringRecords as $id => $string) { $estring = TMTools::encode($string, $params); $nstring = TMTools::normalize($estring); $hash = md5($estring); $tml = new Dataface_Record('translation_miss_log', array()); $tml->setValues(array('string' => $string, 'normalized_string' => $nstring, 'encoded_string' => $estring, 'string_hash' => $hash, 'translation_memory_id' => $this->staticSite->getRecord()->val('translation_memory_id'), 'webpage_id' => $pgid, 'website_id' => $this->staticSite->getRecord()->val('website_id'), 'source_language' => 'en', 'destination_language' => 'fr', 'string_id' => $id)); $res = $tml->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } return $pg; }
/** * Compiles the job inputs into its final form so that it can be worked on. Before * this step the job just has a loose set of input webpages strings and translation * misses. This will grab all of the resources that it needs to be able to * present the job to a translator. This includes loading all resources for all * pages used into the data structure so that the job doesn't depend on outside * factors. */ public function compile() { require_once 'inc/SweteJobPageSucker.php'; try { $res = SweteDb::q("select tml.webpage_id, tml.translation_miss_log_id, tml.string\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "' and\n\t\t\t\t\ttml.webpage_id is not null"); $missedWebpageIds = array(); while ($row = mysql_fetch_assoc($res)) { $missedWebpageIds[$row['webpage_id']][] = $row; } @mysql_free_result($res); // 1. Get all of the webpages $res = SweteDb::q("select webpage_id from job_inputs_webpages where job_id='" . addslashes($this->_rec->val('job_id')) . "'"); $wpids = array(); while ($row = mysql_fetch_row($res)) { $wpids[] = $row[0]; } $site = $this->getSite(); $proxyWriter = $site->getProxyWriter(); $jobWordCount = 0; @mysql_free_result($res); foreach ($wpids as $webpageId) { $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language')); if (!$webpage) { throw new Exception("Could not find webpage with id {$webpageId}"); } $webpage->setSite($site); // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $webpage->getRecord()->val('webpage_content'); $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url'); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl)); //strings from static sites $strings = array(); $res = SweteDb::q("select `string` from job_inputs_webpages_strings where job_id='" . addslashes($this->_rec->val('job_id')) . "' and webpage_id='" . addslashes($webpageId) . "'"); while ($row = mysql_fetch_row($res)) { $strings[] = $row[0]; } @mysql_free_result($res); // Lets see if there are any other strings that were added individually to this page. if (isset($missedWebpageIds[$webpageId])) { foreach ($missedWebpageIds[$webpageId] as $row) { $strings[] = $row['string']; } unset($missedWebpageIds[$webpageId]); } // We need to collapse duplicate strings $uniqueStringIndex = array(); $uniqueStrings = array(); foreach ($strings as $k => $str) { $nstr = TMTools::normalize($str); $estr = TMTools::encode($nstr, $temp); if (!isset($uniqueStringIndex[$estr])) { $uniqueStrings[] = $str; $uniqueStringIndex[$estr] = 1; } } $strings = $uniqueStrings; $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations $tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content')); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } // Add the remainder of the missed webpages. foreach ($missedWebpageIds as $webpageId => $strings) { $webpage = SweteWebpage::loadById($webpageId, $this->_rec->val('source_language')); if (!$webpage) { throw new Exception("Could not find webpage with id {$webpageId}"); } $webpage->setSite($site); // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $webpage->getRecord()->val('webpage_content'); $pageUrl = $site->getSiteUrl() . $webpage->getRecord()->val('webpage_url'); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => $webpage->getRecord()->val('last_checked_content_type'), 'full_contents' => $pageContent->save(), 'webpage_id' => $webpageId, 'source_url' => $pageUrl)); // We need to collapse duplicate strings $uniqueStringIndex = array(); $uniqueStrings = array(); foreach ($strings as $k => $missedstr) { $str = $missedstr['string']; $nstr = TMTools::normalize($str); $estr = TMTools::normalize(TMTools::encode($nstr, $temp)); if (!isset($uniqueStringIndex[$estr])) { $uniqueStrings[] = $str; $uniqueStringIndex[$estr] = 1; } } $strings = $uniqueStrings; $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); //strings $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations $tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $webpage->getRecord()->val('webpage_content')); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } // 2. Get all of the http requests without associated webpages. $res = SweteDb::q("select htl.http_request_log_id, tml.translation_miss_log_id, tml.string, htl.translation_memory_id\n\t\t\t\tfrom \n\t\t\t\t\ttranslation_miss_log tml \n\t\t\t\t\tinner join http_request_log htl on tml.http_request_log_id=htl.http_request_log_id \n\t\t\t\t\tinner join job_inputs_translation_misses jitm on jitm.translation_miss_log_id=tml.translation_miss_log_id\n\t\t\t\t\twhere jitm.job_id='" . addslashes($this->_rec->val('job_id')) . "'"); $hrids = array(); while ($row = mysql_fetch_assoc($res)) { $hrids[$row['http_request_log_id']][] = $row; } //$site = $this->getSite(); //$proxyWriter = $site->getProxyWriter(); @mysql_free_result($res); foreach ($hrids as $hrid => $tmlids) { $hrRecord = df_get_record('http_request_log', array('http_request_log_id' => '=' . $hrid)); if (!$hrRecord) { $ex = new Exception("Cannot add HTTP request to job because it could not be found"); $ex->http_request_log_id = $hrid; throw $ex; } // Use a page sucker to suck all of the resources used by this webpage. $pageSucker = new SweteJobPageSucker($this); $pageContent = $hrRecord->val('response_body'); $pageUrl = $hrRecord->val('proxy_request_url'); if (!$pageUrl) { $ex = new Exception("Failed to add HTTP request to job because it did not have an associated proxy_request_url."); $ex->http_request_log = $hrid; throw $ex; } $pageUrl = $proxyWriter->unproxifyUrl($pageUrl); $pageContent = $pageSucker->processHtml($pageContent, $pageUrl)->save(); $translatable = new Dataface_Record('job_translatable', array()); $translatable->setValues(array('job_id' => $this->_rec->val('job_id'), 'content_type' => 'text/html', 'full_contents' => $pageContent, 'webpage_id' => null, 'source_url' => $hrRecord->val('request_url'))); $tmid = null; $strings = array(); foreach ($tmlids as $tmlid) { $strings[] = $tmlid['string']; $tmid = $tmlid['translation_memory_id']; } $translatable->setValue('translatable_contents', '<div>' . implode('</div><div>', $strings) . '</div>'); //set the word count $pageWordCount = self::getPageWordCount($strings); $jobWordCount += $pageWordCount; $translatable->setValue('word_count', $pageWordCount); // Now we need to get the previous translations //$tmid = $webpage->getTranslationMemoryId(true); $tm = XFTranslationMemory::loadTranslationMemoryById($tmid); if (!$tm) { throw new Exception("Could not find translation memory with id {$tmid}"); } $dict = $this->extractDictionaryFromHtml($tm, $pageContent); $translatable->setValue('previous_translations', serialize($dict)); $res = $translatable->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage(), $res->getCode()); } } if ($jobWordCount == 0) { throw new Exception("The job has no translatable content."); } $this->getRecord()->setValue('word_count', $jobWordCount); $this->getRecord()->setValue('compiled', 1); $res = $this->getRecord()->save(); if (PEAR::isError($res)) { throw new Exception($res->getMessage()); } SweteDb::q("commit"); } catch (Exception $ex) { SweteDb::q("rollback"); throw $ex; } }