public function changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng) { $segmentor = new Multilingual_Aligner_SentenceSegmentor(); $num = 0; foreach ($changed_diff_unchanged as $value) { if ($value == "*deleted*") { unset($changed_diff_unchanged[$num]); } $num++; } $changed_diff_unchanged = array_values($changed_diff_unchanged); $num = 0; while (count($changed_diff_unchanged) > 0) { $value = $changed_diff_unchanged[0]; $num++; $key_value = ""; $target_lng_array = $alignments->getSentenceInOtherLanguage($value, $source_lng, $key_value, $changed_diff_unchanged, $this->array_search_function($value, $changed_diff_unchanged)); //as two or more target sentences are being considered as one string, here instead of string arrays should be returned $key_value = $target_lng_array[0]; $target_lng_sentence = $target_lng_array[1]; if (strcmp($target_lng_sentence, "NULL") != 0) { $source_sent = $segmentor->segment(trim($key_value)); $index = $this->array_search_function($value, $changed_diff_unchanged); $jj = 0; for ($ii = $index, $count_ss = count($source_sent); $ii < $count_ss + $index + $jj; $ii++) { if ($changed_diff_unchanged[$ii] == "" || $changed_diff_unchanged[$ii][0] != "+") { unset($changed_diff_unchanged[$ii]); } else { $jj++; } } $sentences = $segmentor->segment(trim($target_lng_sentence)); foreach ($sentences as $item) { $changedSource_translated[] = trim($item); } } else { //Machine Translation is required if ($value != "" && $value != "+") { if ($value[0] == "+") { $temp = substr($value, 1); $translation = $translator->getTranslationInOtherLanguage($temp, $source_lng); if ($translation != "NULL") { $changedSource_translated[] = "+" . trim($translation); } else { //$changedSource_translated[]="+"."no translation is available in french for $temp"; $changedSource_translated[] = "+" . "{$temp}"; } } else { $translation = $translator->getTranslationInOtherLanguage($value, $source_lng); if ($translation != "NULL") { $changedSource_translated[] = "+" . trim($translation); } else { //$changedSource_translated[]="+"."no translation is available in french for $value"; $changedSource_translated[] = "+" . "{$value}"; } } } else { $changedSource_translated[] = $value; } $index = $this->array_search_function($value, $changed_diff_unchanged); unset($changed_diff_unchanged[$index]); } $changed_diff_unchanged = array_values($changed_diff_unchanged); } return $changedSource_translated; }
public function do_test_basic_segmentation($text, $expSentences, $message) { $segmentor = new Multilingual_Aligner_SentenceSegmentor(); $sentences = $segmentor->segment($text); $got_sentences_as_string = implode(', ', $sentences); $exp_sentences_as_string = implode(', ', $expSentences); $this->assertEquals($expSentences, $sentences, $message . "\n" . "Segmented sentences differed from expected.\n" . "Expected Sentences: {$exp_sentences_as_string}\n" . "Got Sentences: {$got_sentences_as_string}\n"); }
public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng, $key_value, $sentence_array, $index) { echo "in getSentenceInOtherLanguage<br/>"; $segmentor = new Multilingual_Aligner_SentenceSegmentor(); if ($source_lng == $this->l1) { $k = 1; } else { if ($source_lng == $this->l2) { $k = 0; } } foreach ($this->alignment_table as $key => $val) { if ($k == 1) { echo "key##{$key}<br/>"; $sentences = $segmentor->segment(trim($key)); echo "count " . count($sentences) . "<br/>"; foreach ($sentences as $t) { echo "line after segmenting ##{$t}<br/>"; } if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { $found = 1; for ($j = 1, $l = 1, $countSentences = count($sentences); $j < $countSentences; $l++) { $flag = 0; if ($l + $index >= count($sentence_array)) { $found = 0; break; } if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { $found = 0; break; } else { $flag = 1; } } if ($flag == 0) { $j++; } } if ($found == 1) { $key_value = $key; $array = array($key, $val); return $array; } } } else { $sentences = $segmentor->segment(trim($val)); if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { $found = 1; for ($j = $i + 1, $l = 1, $countSentences = count($sentences); $j < $countSentences; $l++) { $flag = 0; if ($l + $index >= count($sentence_array)) { $found = 0; break; } if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { //if it is an added sentence $found = 0; break; } else { $flag = 1; } } if ($flag == 0) { $j++; } } if ($found == 1) { $key_value = $val; $array = array($val, $key); return $array; } } } } if ($k == 1) { $times = 0; $i = -1; $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; $start = 0; $value = ""; $found = 0; foreach ($this->alignment_table as $key => $val) { $start++; $sent_ind = 0; $sentences = $segmentor->segment(trim($key)); for ($j = 0, $countSentences = count($sentences); $j < $countSentences; $j++) { $sentences[$j] = trim($sentences[$j]); } echo "another sentence<br/>"; while (1) { $found = 0; //if source line is a part of translation if ($temp1 == "NULL" && $sent_ind < count($sentences)) { $temp1 = $sentences[$sent_ind]; $sent_ind++; } if ($temp2 == "NULL") { $temp2 = $source_lng_sentence; $index1; } $temp1 = trim($temp1); $temp2 = trim($temp2); if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { $found = 1; echo "inside strpos_function({$temp1},{$temp2})<br/>"; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { echo "inside strlen({$temp1})==strlen({$temp2}) and ####start= {$start}<br/>"; for ($u = 0; $u < $start; $u++) { prev($this->alignment_table); } $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { echo "outside<br/>"; next($this->alignment_table); $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp1 = substr($temp1, strlen($temp2)); if ($temp1 == "") { $temp1 = "NULL"; } while ($index1 + 1 < count($sentence_array)) { if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { $temp2 = $sentence_array[$index1 + 1]; $index1++; break; } $index1++; } continue; } else { if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { echo "inside strlen({$temp1})==strlen({$temp2}) and ####start= {$start}<br/>"; for ($u = 0; $u < $start; $u++) { prev($this->alignment_table); } $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { next($this->alignment_table); $key_value = $key_value . key($this->alignment_table); $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp2 = substr($temp2, strlen($temp1)); if ($sent_ind >= count($sentences)) { $temp1 = "NULL"; break; } else { $temp1 = $sentences[$sent_ind]; $sent_ind++; } } } if ($found == 0) { echo "break<br/>"; $start = 0; $value = ""; break; } } if ($found == 0) { $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; } } } else { $times = 0; $i = -1; $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; $start = 0; $value = ""; $found = 0; foreach ($this->alignment_table as $key => $val) { $start++; $sent_ind = 0; $sentences = $segmentor->segment(trim($val)); for ($j = 0, $countSentences = count($sentences); $j < $countSentences; $j++) { $sentences[$j] = trim($sentences[$j]); } while (1) { $found = 0; if ($temp1 == "NULL" && $sent_ind < count($sentences)) { $temp1 = $sentences[$sent_ind]; $sent_ind++; } if ($temp2 == "NULL") { $temp2 = $source_lng_sentence; $index1; } $temp1 = trim($temp1); $temp2 = trim($temp2); if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { prev($this->alignment_table); } $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { next($this->alignment_table); $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp1 = substr($temp1, strlen($temp2)); if ($temp1 == "") { $temp1 = "NULL"; } while ($index1 + 1 < count($sentence_array)) { if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { $temp2 = $sentence_array[$index1 + 1]; $index1++; break; } $index1++; } continue; } else { if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { prev($this->alignment_table); } $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { next($this->alignment_table); $key_value = $key_value . current($this->alignment_table); $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp2 = substr($temp2, strlen($temp1)); if ($sent_ind >= count($sentences)) { $temp1 = "NULL"; break; } else { $temp1 = $sentences[$sent_ind]; $sent_ind++; } } } if ($found == 0) { echo "break<br/>"; $start = 0; $value = ""; break; } } if ($found == 0) { $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; } } } $array = array("", "NULL"); return $array; }
function splitInLogicalChunksOf450CharsMax($text) { $chunks = array(); $segmentor = new Multilingual_Aligner_SentenceSegmentor(); $sentences = $segmentor->segment($text); $ii = 0; $chunk = $sentences[$ii]; while ($ii < count($sentences) - 1) { $ii++; if (strlen(urlencode($chunk)) < 450) { $chunk = $chunk . $sentences[$ii]; } else { $chunks[] = $chunk; $chunk = $sentences[$ii]; } } $chunks[] = $chunk; return $chunks; }
public function UpdatingTargetPage($source_outofdate, $source_modified, $target_outofdate, $target_modified, $source_lng, $target_lng) { $segmentor = new Multilingual_Aligner_SentenceSegmentor(); $source_outofdate_string = $source_outofdate; $source_modified_string = $source_modified; $target_outofdate_string = $target_outofdate; $target_modified_string = $target_modified; $source_outofdate_sentences = $segmentor->segment($source_outofdate_string); $source_modified_sentences = $segmentor->segment($source_modified_string); $target_outofdate_sentences = $segmentor->segment($target_outofdate_string); $target_modified_sentences = $segmentor->segment($target_modified_string); $target_modified_sentences[count($target_modified_sentences)] = 'dummy'; $i = -1; for ($a = 0, $aCountSourceOutofdateSentences = count($source_outofdate_sentences); $a < $aCountSourceOutofdateSentences; $a++) { $source_outofdate_sentences[$a] = trim($source_outofdate_sentences[$a]); } for ($a = 0, $aCountSourceModifiedSentences = count($source_modified_sentences); $a < $aCountSourceModifiedSentences; $a++) { $source_modified_sentences[$a] = trim($source_modified_sentences[$a]); } for ($a = 0, $aCountTargetOutofdateSentences = count($target_outofdate_sentences); $a < $aCountTargetOutofdateSentences; $a++) { $target_outofdate_sentences[$a] = trim($target_outofdate_sentences[$a]); } for ($a = 0, $aCountTargetModifiedSentences = count($target_modified_sentences); $a < $aCountTargetModifiedSentences; $a++) { $target_modified_sentences[$a] = trim($target_modified_sentences[$a]); } $update = new Multilingual_Aligner_UpdateSentences1(); $source_diff = $update->DifferencebetweenOriginalFileandModifiedFile($source_outofdate_sentences, $source_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 1); $target_diff = $update->DifferencebetweenOriginalFileandModifiedFile($target_outofdate_sentences, $target_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 0); for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) { $source_diff[$a] = trim($source_diff[$a]); } for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) { $target_diff[$a] = trim($target_diff[$a]); if ($target_diff[$a] == '+dummy') { unset($target_diff[$a]); } } $target_diff = array_values($target_diff); for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) { $index = 0; $i = 2; if ($source_diff[$a] == '' || $source_diff[$a][0] != '+') { $pat = 0; if ($source_diff[$a] != '' && $source_diff[$a][0] == '<' && is_numeric($source_diff[$a][1])) { $b = 2; while (is_numeric($source_diff[$a][$b])) { $b++; } if ($source_diff[$a][$b] == '>') { $pat = 1; } } if ($source_diff[$a] == '' || $pat == 0) { $index = array_search($source_diff[$a], $source_diff); $index++; $first = 0; while (($c = $this->array_search_function($source_diff[$a], $source_diff, $index)) != -1) { $first = 1; $source_diff[$c] = '<$i>' . $source_diff[$c]; $i++; $index = $c + 1; } $source_diff[$a] = '<1>' . $source_diff[$a]; } } } for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) { $index = 1; $i = 2; if ($target_diff[$a] == '' || $target_diff[$a][0] != '+') { $pat = 0; if ($target_diff[$a] != '' && $target_diff[$a][0] == '<' && is_numeric($target_diff[$a][1])) { $b = 2; while (is_numeric($target_diff[$a][$b])) { $b++; } if ($target_diff[$a][$b] == '>') { $pat = 1; } } if ($target_diff[$a] == '' || $pat == 0) { $index = array_search($target_diff[$a], $target_diff); $index++; $first = 0; while (($c = $this->array_search_function($target_diff[$a], $target_diff, $index)) != -1) { $first = 1; $target_diff[$c] = '<$i>' . $target_diff[$c]; $i++; $index = $c + 1; } $target_diff[$a] = '<1>' . $target_diff[$a]; } } } $final_updated = $update->FinalUpdatedFileinTagetLanguage($source_diff, $target_diff); /* echo "final_updated_sentences<br/>"; foreach ($final_updated as $item) echo "sentence-> ".$item."<br/>"; $content = implode(' ', $final_updated); echo "$content<br/>"; if (strcmp("Firefox supporte les caract�res internationaux pour des langues tel que lindien. Added_Source This is a test statement. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.", $content)==0) echo "match<br/>"; else echo "fail"; */ return $final_updated; }
public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng, $key_value, $sentence_array, $index) { $segmentor = new Multilingual_Aligner_SentenceSegmentor(); if ($source_lng == $this->l1) { $k = 1; } else { if ($source_lng == $this->l2) { $k = 0; } } foreach ($this->alignment_table as $key => $val) { if ($k == 1) { $sentences = $segmentor->segment(trim($key)); if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { //if one of those is matched $found = 1; for ($j = 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) { $flag = 0; if ($l + $index >= count($sentence_array)) { $found = 0; break; } if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { //if it is an added sentence $found = 0; break; } else { $flag = 1; } } //if if ($flag == 0) { $j++; } } //for if ($found == 1) { $key_value = $key; $array = array($key, $val); return $array; } //if } //if } else { $sentences = $segmentor->segment(trim($val)); if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { //if one of those is matched $found = 1; for ($j = $i + 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) { $flag = 0; if ($l + $index >= count($sentence_array)) { $found = 0; break; } if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { //if it is an added sentence $found = 0; break; } else { $flag = 1; } } //if if ($flag == 0) { $j++; } } //for if ($found == 1) { $key_value = $val; $array = array($val, $key); return $array; } //if } //if } //else } //foreach if ($k == 1) { $times = 0; $i = -1; $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; $start = 0; $value = ""; $found = 0; foreach ($this->alignment_table as $key => $val) { $start++; $sent_ind = 0; $sentences = $segmentor->segment(trim($key)); for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) { $sentences[$j] = trim($sentences[$j]); } while (1) { $found = 0; if ($temp1 == "NULL" && $sent_ind < count($sentences)) { $temp1 = $sentences[$sent_ind]; $sent_ind++; } if ($temp2 == "NULL") { $temp2 = $source_lng_sentence; $index1; } $temp1 = trim($temp1); $temp2 = trim($temp2); if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { //return key and val prev($this->alignment_table); } $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { //return key and val next($this->alignment_table); $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp1 = substr($temp1, strlen($temp2)); if ($temp1 == "") { $temp1 = "NULL"; } while ($index1 + 1 < count($sentence_array)) { if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { $temp2 = $sentence_array[$index1 + 1]; $index1++; break; } //if $index1++; } //while continue; } else { if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { //return key and val prev($this->alignment_table); } $d = key($this->alignment_table); $key_value = $key_value . $d; $value = $value . current($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { //return key and val next($this->alignment_table); $key_value = $key_value . key($this->alignment_table); $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp2 = substr($temp2, strlen($temp1)); if ($sent_ind >= count($sentences)) { $temp1 = "NULL"; break; } else { $temp1 = $sentences[$sent_ind]; $sent_ind++; } } } //if strpos_function($sentence[0],$source_lng_sentence) if ($found == 0) { $start = 0; $value = ""; break; } //if } //while if ($found == 0) { $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; } } //foreach } else { //if $k $times = 0; $i = -1; $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; $start = 0; $value = ""; $found = 0; foreach ($this->alignment_table as $key => $val) { $start++; $sent_ind = 0; $sentences = $segmentor->segment(trim($val)); for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) { $sentences[$j] = trim($sentences[$j]); } while (1) { $found = 0; if ($temp1 == "NULL" && $sent_ind < count($sentences)) { $temp1 = $sentences[$sent_ind]; $sent_ind++; } if ($temp2 == "NULL") { $temp2 = $source_lng_sentence; $index1; } $temp1 = trim($temp1); $temp2 = trim($temp2); if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { //return key and val prev($this->alignment_table); } $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { //return key and val next($this->alignment_table); $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp1 = substr($temp1, strlen($temp2)); if ($temp1 == "") { $temp1 = "NULL"; } while ($index1 + 1 < count($sentence_array)) { if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { $temp2 = $sentence_array[$index1 + 1]; $index1++; break; } //if $index1++; } //while continue; } else { if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { $found = 1; if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { for ($u = 0; $u < $start; $u++) { //return key and val prev($this->alignment_table); } $d = current($this->alignment_table); $key_value = $key_value . $d; $value = $value . key($this->alignment_table); for ($u = 0; $u < $start - 1; $u++) { //return key and val next($this->alignment_table); $key_value = $key_value . current($this->alignment_table); $value = $value . current($this->alignment_table); } $array = array($key_value, $value, $dummy); $start = 0; return $array; } $temp2 = substr($temp2, strlen($temp1)); if ($sent_ind >= count($sentences)) { $temp1 = "NULL"; break; } else { $temp1 = $sentences[$sent_ind]; $sent_ind++; } } } if ($found == 0) { $start = 0; $value = ""; break; } } //while if ($found == 0) { $temp1 = "NULL"; $temp2 = "NULL"; $index1 = $index; } } //foreach } //else $array = array("", "NULL"); return $array; }
public function _segment_into_sentences($text) { $segmentor = new Multilingual_Aligner_SentenceSegmentor(); $sentences = $segmentor->segment($text); return $sentences; }