/** * Loop through all the post phrases and return them in json formatted script * @param int $postID */ function get_post_phrases($postID) { // Some security, to avoid others from seeing private posts // fake post for tags if ($postID == -555) { $phrases = $this->get_tags(); $title = "tags"; } else { if (!current_user_can('edit_post', $postID)) { return; } global $post; // thid is needed because some of the functions below expect it... $post = get_post($postID); // Display filters $title = apply_filters('the_title', $post->post_title); $content = apply_filters('the_content', $post->post_content); $the_content_feed = apply_filters('the_content_feed', $content); $excerpt = apply_filters('get_the_excerpt', $post->post_excerpt); $excerpt_rss = apply_filters('the_excerpt_rss', $excerpt); //TODO - get comments text $parser = new parser(); $phrases = $parser->get_phrases_list($content); $phrases2 = $parser->get_phrases_list($title); $phrases3 = $parser->get_phrases_list($the_content_feed); $phrases4 = $parser->get_phrases_list($excerpt); $phrases5 = $parser->get_phrases_list($excerpt_rss); // Merge the two arrays for traversing $phrases = array_merge($phrases, $phrases2, $phrases3, $phrases4, $phrases5); tp_logger($phrases, 4); // Add phrases from permalink if ($this->transposh->options->enable_url_translate) { $permalink = get_permalink($postID); $permalink = substr($permalink, strlen($this->transposh->home_url) + 1); $parts = explode('/', $permalink); foreach ($parts as $part) { if (!$part || is_numeric($part)) { continue; } $part = str_replace('-', ' ', $part); $phrases[] = urldecode($part); } } } // We provide the post title here $json['posttitle'] = $title; // and all languages we might want to target $json['langs'] = array(); foreach ($phrases as $key) { foreach (explode(',', $this->transposh->options->viewable_languages) as $lang) { // if this isn't the default language or we specifically allow default language translation, we will seek this out... // as we don't normally want to auto-translate the default language -FIX THIS to include only correct stuff, how? if (!$this->transposh->options->is_default_language($lang) || $this->transposh->options->enable_default_translate) { // There is no point in returning phrases, languages pairs that cannot be translated if (in_array($lang, transposh_consts::$bing_languages) || in_array($lang, transposh_consts::$google_languages) || in_array($lang, transposh_consts::$apertium_languages)) { list($source, $translation) = $this->transposh->database->fetch_translation($key, $lang); if (!$translation) { // p stands for phrases, l stands for languages, t is token if (!@is_array($json['p'][$key]['l'])) { $json['p'][$key]['l'] = array(); } array_push($json['p'][$key]['l'], $lang); if (!in_array($lang, $json['langs'])) { array_push($json['langs'], $lang); } } } } } // only if a languages list was created we'll need to translate this if (@is_array($json['p'][$key]['l'])) { $json['p'][$key]['t'] = transposh_utils::base64_url_encode($key); @$json['length']++; } } // the header helps with debugging header("Content-type: text/javascript"); echo json_encode($json); }
/** * Main function - actually translates a given HTML * @param string $string containing HTML * @return string Translated content is here */ function fix_html($string) { // ready our stats $this->stats = new parserstats(); // handler for possible json (buddypress) if ($this->might_json) { if ($string[0] == '{') { $jsoner = json_decode($string); if ($jsoner != null) { tp_logger("json detected (buddypress?)", 4); // currently we only handle contents (which buddypress heavily use) if ($jsoner->contents) { $jsoner->contents = $this->fix_html($jsoner->contents); return json_encode($jsoner); } } } } // create our dom $string = str_replace(chr(0xc2) . chr(0xa0), ' ', $string); // annoying NBSPs? $this->html = str_get_html($string); // mark translateable elements if ($this->html->find('html', 0)) { $this->html->find('html', 0)->lang = ''; } // Document defined lang may be preset to correct lang, but should be ignored TODO: Better? $this->translate_tagging($this->html->root); // first fix the html tag itself - we might need to to the same for all such attributes with flipping if ($this->html->find('html', 0)) { if ($this->dir_rtl) { $this->html->find('html', 0)->dir = 'rtl'; } else { $this->html->find('html', 0)->dir = 'ltr'; } } if ($this->lang) { if ($this->html->find('html', 0)) { $this->html->find('html', 0)->lang = $this->lang; } // add support for <meta name="language" content="<lang>"> if ($this->html->find('meta[name=language]')) { $this->html->find('meta[name=language]')->content = $this->lang; } } // not much point in further processing if we don't have a function that does it if ($this->fetch_translate_func == null) { return $this->html; } // fix feed if ($this->feed_fix) { // fix urls on feed tp_logger('fixing rss feed', 3); foreach (array('link', 'wfw:commentrss', 'comments') as $tag) { foreach ($this->html->find($tag) as $e) { $e->innertext = htmlspecialchars(call_user_func_array($this->url_rewrite_func, array($e->innertext))); // no need to translate anything here unset($e->nodes); } } // guid is not really a url -- in some future, we can check if permalink is true and probably falsify it foreach ($this->html->find('guid') as $e) { $e->innertext = $e->innertext . '-' . $this->lang; unset($e->nodes); } // fix feed language $this->html->find('language', 0)->innertext = $this->lang; unset($this->html->find('language', 0)->nodes); } else { // since this is not a feed, we might have references to such in the <link rel="alternate"> foreach ($this->html->find('link') as $e) { if (strcasecmp($e->rel, 'alternate') == 0 || strcasecmp($e->rel, 'canonical') == 0) { $e->href = call_user_func_array($this->url_rewrite_func, array($e->href)); } } } // try some prefetching... (//todo - maybe move directly to the phrase create) $originals = array(); if ($this->prefetch_translate_func != null) { foreach ($this->html->find('text') as $e) { foreach ($e->nodes as $ep) { if ($ep->phrase) { $originals[$ep->phrase] = true; } } } foreach (array('title', 'value') as $title) { foreach ($this->html->find('[' . $title . ']') as $e) { if (isset($e->nodes)) { foreach ($e->nodes as $ep) { if ($ep->phrase) { $originals[$ep->phrase] = true; } } } } } foreach ($this->html->find('[content]') as $e) { foreach ($e->nodes as $ep) { if ($ep->phrase) { $originals[$ep->phrase] = true; } } } // if we should split, we will split some urls for translation prefetching if ($this->split_url_func != null) { foreach ($this->atags as $e) { foreach (call_user_func_array($this->split_url_func, array($e->href)) as $part) { $originals[$part] = true; } } foreach ($this->otags as $e) { foreach (call_user_func_array($this->split_url_func, array($e->value)) as $part) { $originals[$part] = true; } } } call_user_func_array($this->prefetch_translate_func, array($originals, $this->lang)); } //fix urls more // WORK IN PROGRESS /* foreach ($this->atags as $e) { $hrefspans = ''; foreach (call_user_func_array($this->split_url_func, array($e->href)) as $part) { // fix - not for dashes list ($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($part, $this->lang)); $hrefspans .= $this->create_edit_span($part, $translated_text, $source, true); } $e->href = call_user_func_array($this->url_rewrite_func, array($e->href)); $e->outertext .= $hrefspans; } */ // fix urls... foreach ($this->atags as $e) { if ($e->href) { $e->href = call_user_func_array($this->url_rewrite_func, array($e->href)); } } foreach ($this->otags as $e) { if ($e->value) { $e->value = call_user_func_array($this->url_rewrite_func, array($e->value)); } } // this is used to reserve spans we cannot add directly (out of body, metas, etc) $hiddenspans = ''; $savedspan = ''; // actually translate tags // texts are first foreach ($this->html->find('text') as $e) { $replace = array(); foreach ($e->nodes as $ep) { list($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang)); //stats $this->stats->total_phrases++; if ($translated_text) { $this->stats->translated_phrases++; if ($source == 0) { $this->stats->human_translated_phrases++; } } if ($this->is_edit_mode || $this->is_auto_translate && $translated_text == null) { if ($ep->inselect) { $savedspan .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang); } elseif (!$ep->inbody) { $hiddenspans .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang); } else { $translated_text = $this->create_edit_span($ep->phrase, $translated_text, $source, false, $ep->srclang); } } // store replacements if ($translated_text) { $replace[] = array($translated_text, $ep); } } // do replacements in reverse foreach (array_reverse($replace) as $epag) { list($replacetext, $epg) = $epag; $e->outertext = substr_replace($e->outertext, $replacetext, $epg->start, $epg->len); } // this adds saved spans to the first not in select element which is in the body if ($e->nodes && !$ep->inselect && $savedspan && $ep->inbody) { // (TODO: might not be...?) $e->outertext = $savedspan . $e->outertext; $savedspan = ''; } } // now we handle the title attributes (and the value of submit buttons) $hidden_phrases = array(); foreach (array('title', 'value') as $title) { foreach ($this->html->find('[' . $title . ']') as $e) { $replace = array(); $span = ''; // when we already have a parent outertext we'll have to update it directly if (isset($e->parent->_[HDOM_INFO_OUTER])) { $saved_outertext = $e->outertext; } tp_logger("{$title}-original: {$e}->{$title}}", 4); if (isset($e->nodes)) { foreach ($e->nodes as $ep) { if ($ep->tag == 'phrase') { list($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang)); // more stats $this->stats->total_phrases++; if ($ep->inbody) { $this->stats->hidden_phrases++; } else { $this->stats->meta_phrases++; } if ($translated_text) { $this->stats->translated_phrases++; if ($ep->inbody) { $this->stats->hidden_translated_phrases++; } else { $this->stats->meta_translated_phrases++; } if ($source == 0) { $this->stats->human_translated_phrases++; } } if (($this->is_edit_mode || $this->is_auto_translate && $translated_text == null) && $ep->inbody) { // prevent duplicate translation (title = text) if (strpos($e->innertext, transposh_utils::base64_url_encode($ep->phrase)) === false) { //no need to translate span the same hidden phrase more than once if (!in_array($ep->phrase, $hidden_phrases)) { $this->stats->hidden_translateable_phrases++; $span .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang); // logger ($span); $hidden_phrases[] = $ep->phrase; } } } // if we need to replace, we store this if ($translated_text) { $replace[$translated_text] = $ep; } } } } // and later replace foreach (array_reverse($replace, true) as $replace => $epg) { $e->title = substr_replace($e->title, $replace, $epg->start, $epg->len); } $e->outertext .= $span; // this is where we update in the outercase issue if (isset($e->parent->_[HDOM_INFO_OUTER])) { $e->parent->outertext = implode($e->outertext, explode($saved_outertext, $e->parent->outertext, 2)); } } } // now we handle the meta content - which is simpler because they can't be edited or auto-translated in place // we also don't expect any father modifications here // so we now add all those spans right before the <body> tag end foreach ($this->html->find('[content]') as $e) { $right = ''; $newtext = ''; foreach ($e->nodes as $ep) { if ($ep->tag == 'phrase') { // even more stats $this->stats->total_phrases++; $this->stats->meta_phrases++; list($source, $translated_text) = call_user_func_array($this->fetch_translate_func, array($ep->phrase, $this->lang)); if ($translated_text) { $this->stats->translated_phrases++; $this->stats->meta_translated_phrases++; if ($source == 0) { $this->stats->human_translated_phrases++; } list($left, $right) = explode($ep->phrase, $e->content, 2); $newtext .= $left . $translated_text; $e->content = $right; } if ($this->is_edit_mode) { $hiddenspans .= $this->create_edit_span($ep->phrase, $translated_text, $source, true, $ep->srclang); } if (!$translated_text && $this->is_auto_translate && !$this->is_edit_mode) { tp_logger('untranslated meta for ' . $ep->phrase . ' ' . $this->lang); if ($this->is_edit_mode || $this->is_auto_translate) { // FIX } } } } if ($newtext) { $e->content = $newtext . $right; tp_logger("content-phrase: {$newtext}", 4); } } if ($hiddenspans) { $body = $this->html->find('body', 0); if ($body != null) { $body->lastChild()->outertext .= $hiddenspans; } } // This adds a meta tag with our statistics json-encoded inside... $this->stats->stop_timing(); $head = $this->html->find('head', 0); if ($this->edit_span_created) { if ($head != null) { $head->lastChild()->outertext .= $this->added_header; } } //exit; if ($head != null) { $head->lastChild()->outertext .= "\n<meta name=\"translation-stats\" content='" . json_encode($this->stats) . "'/>"; } // we make sure that the result is clear from our shananigans return str_replace(array(TP_GTXT_BRK, TP_GTXT_IBRK, TP_GTXT_BRK_CLOSER, TP_GTXT_IBRK_CLOSER), '', $this->html->outertext); // Changed because of places where tostring failed //return $this->html; //return $this->html->outertext; }
function on_ajax_nopriv_tp_gp() { // we need curl for this proxy if (!function_exists('curl_init')) { return; } transposh_utils::allow_cors(); // target language $tl = $_GET['tl']; // we want to avoid unneeded work or dos attacks on languages we don't support if (!in_array($tl, transposh_consts::$google_languages) || !$this->options->is_active_language($tl)) { return; } // source language $sl = 'auto'; if (isset($_GET['sl'])) { $sl = $_GET['sl']; } // item count $i = 0; $q = ''; foreach ($_GET['q'] as $p) { list($source, $trans) = $this->database->fetch_translation($p, $tl); if (!$trans) { $q .= '&q=' . urlencode(stripslashes($p)); } else { // holds cached results $r[$i] = $trans; } $i++; } // we avoid curling we had all results prehand if ($q) { $url = 'http://translate.google.com/translate_a/t?client=a' . $q . '&tl=' . $tl . '&sl=' . $sl; tp_logger($url, 5); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //must set agent for google to respond with utf-8 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0'); $output = curl_exec($ch); if ($output === false) { echo 'Curl error: ' . curl_error($ch); die; } curl_close($ch); tp_logger($output, 5); $jsonarr = json_decode($output); if (!$jsonarr) { tp_logger("google didn't return JSON, lets try to recover", 4); $newout = str_replace(',,', ',"",', $output); $jsonarrt = json_decode($newout); @($jsonarr->results = array()); foreach ($jsonarrt[0] as $result) { array_push($jsonarr->results, $result[0][0][0]); } // If there is still no JSON if (!$jsonarr) { echo 'Not JSON'; die; } } else { if (!isset($jsonarr->results)) { $jsonarr2->results[] = $jsonarr; $jsonarr = $jsonarr2; } foreach ($jsonarr->results as $result) { unset($result->sentences[0]->orig); unset($result->sentences[0]->translit); unset($result->sentences[0]->src_translit); unset($result->src); unset($result->server_time); } } } header('Content-type: text/html; charset=utf-8'); // here we match online results with cached ones $k = 0; for ($j = 0; $j < $i; $j++) { if (isset($r[$j])) { $jsonout->results[] = $r[$j]; } else { if (isset($jsonarr->results[$k]->sentences[0]->trans)) { $jsonout->results[] = $jsonarr->results[$k]->sentences[0]->trans; } elseif (isset($jsonarr->results[$k]) && $jsonarr->results[$k]) { $jsonout->results[] = $jsonarr->results[$k]; } else { $jsonout->results[] = $_GET['q'][$j]; } $k++; } } echo json_encode($jsonout); // do the db dance - a bit hackish way to insert downloaded translations directly to the db without having // to pass through the user and collect $200 if ($k) { $_POST['items'] = $k; $_POST['ln0'] = $tl; $_POST['sr0'] = 1; // google, hmm hmm, $k = 0; for ($j = 0; $j < $i; $j++) { if (!isset($r[$j])) { $_POST["tk{$k}"] = transposh_utils::base64_url_encode(stripslashes($_GET['q'][$j])); // stupid, but should work $_POST["tr{$k}"] = $jsonout->results[$j]; $k++; } } $this->database->update_translation(); } die; }