Exemplo n.º 1
0
 public function changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng)
 {
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     $num = 0;
     foreach ($changed_diff_unchanged as $value) {
         if ($value == "*deleted*") {
             unset($changed_diff_unchanged[$num]);
         }
         $num++;
     }
     $changed_diff_unchanged = array_values($changed_diff_unchanged);
     $num = 0;
     while (count($changed_diff_unchanged) > 0) {
         $value = $changed_diff_unchanged[0];
         $num++;
         $key_value = "";
         $target_lng_array = $alignments->getSentenceInOtherLanguage($value, $source_lng, $key_value, $changed_diff_unchanged, $this->array_search_function($value, $changed_diff_unchanged));
         //as two or more target sentences are being considered as one string, here instead of string arrays should be returned
         $key_value = $target_lng_array[0];
         $target_lng_sentence = $target_lng_array[1];
         if (strcmp($target_lng_sentence, "NULL") != 0) {
             $source_sent = $segmentor->segment(trim($key_value));
             $index = $this->array_search_function($value, $changed_diff_unchanged);
             $jj = 0;
             for ($ii = $index, $count_ss = count($source_sent); $ii < $count_ss + $index + $jj; $ii++) {
                 if ($changed_diff_unchanged[$ii] == "" || $changed_diff_unchanged[$ii][0] != "+") {
                     unset($changed_diff_unchanged[$ii]);
                 } else {
                     $jj++;
                 }
             }
             $sentences = $segmentor->segment(trim($target_lng_sentence));
             foreach ($sentences as $item) {
                 $changedSource_translated[] = trim($item);
             }
         } else {
             //Machine Translation is required
             if ($value != "" && $value != "+") {
                 if ($value[0] == "+") {
                     $temp = substr($value, 1);
                     $translation = $translator->getTranslationInOtherLanguage($temp, $source_lng);
                     if ($translation != "NULL") {
                         $changedSource_translated[] = "+" . trim($translation);
                     } else {
                         //$changedSource_translated[]="+"."no translation is available in french for $temp";
                         $changedSource_translated[] = "+" . "{$temp}";
                     }
                 } else {
                     $translation = $translator->getTranslationInOtherLanguage($value, $source_lng);
                     if ($translation != "NULL") {
                         $changedSource_translated[] = "+" . trim($translation);
                     } else {
                         //$changedSource_translated[]="+"."no translation is available in french for $value";
                         $changedSource_translated[] = "+" . "{$value}";
                     }
                 }
             } else {
                 $changedSource_translated[] = $value;
             }
             $index = $this->array_search_function($value, $changed_diff_unchanged);
             unset($changed_diff_unchanged[$index]);
         }
         $changed_diff_unchanged = array_values($changed_diff_unchanged);
     }
     return $changedSource_translated;
 }
Exemplo n.º 2
0
 public function do_test_basic_segmentation($text, $expSentences, $message)
 {
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     $sentences = $segmentor->segment($text);
     $got_sentences_as_string = implode(', ', $sentences);
     $exp_sentences_as_string = implode(', ', $expSentences);
     $this->assertEquals($expSentences, $sentences, $message . "\n" . "Segmented sentences differed from expected.\n" . "Expected Sentences: {$exp_sentences_as_string}\n" . "Got      Sentences: {$got_sentences_as_string}\n");
 }
Exemplo n.º 3
0
 public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng, $key_value, $sentence_array, $index)
 {
     echo "in getSentenceInOtherLanguage<br/>";
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     if ($source_lng == $this->l1) {
         $k = 1;
     } else {
         if ($source_lng == $this->l2) {
             $k = 0;
         }
     }
     foreach ($this->alignment_table as $key => $val) {
         if ($k == 1) {
             echo "key##{$key}<br/>";
             $sentences = $segmentor->segment(trim($key));
             echo "count " . count($sentences) . "<br/>";
             foreach ($sentences as $t) {
                 echo "line after segmenting ##{$t}<br/>";
             }
             if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) {
                 $found = 1;
                 for ($j = 1, $l = 1, $countSentences = count($sentences); $j < $countSentences; $l++) {
                     $flag = 0;
                     if ($l + $index >= count($sentence_array)) {
                         $found = 0;
                         break;
                     }
                     if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) {
                         if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") {
                             $found = 0;
                             break;
                         } else {
                             $flag = 1;
                         }
                     }
                     if ($flag == 0) {
                         $j++;
                     }
                 }
                 if ($found == 1) {
                     $key_value = $key;
                     $array = array($key, $val);
                     return $array;
                 }
             }
         } else {
             $sentences = $segmentor->segment(trim($val));
             if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) {
                 $found = 1;
                 for ($j = $i + 1, $l = 1, $countSentences = count($sentences); $j < $countSentences; $l++) {
                     $flag = 0;
                     if ($l + $index >= count($sentence_array)) {
                         $found = 0;
                         break;
                     }
                     if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) {
                         if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") {
                             //if it is an added sentence
                             $found = 0;
                             break;
                         } else {
                             $flag = 1;
                         }
                     }
                     if ($flag == 0) {
                         $j++;
                     }
                 }
                 if ($found == 1) {
                     $key_value = $val;
                     $array = array($val, $key);
                     return $array;
                 }
             }
         }
     }
     if ($k == 1) {
         $times = 0;
         $i = -1;
         $temp1 = "NULL";
         $temp2 = "NULL";
         $index1 = $index;
         $start = 0;
         $value = "";
         $found = 0;
         foreach ($this->alignment_table as $key => $val) {
             $start++;
             $sent_ind = 0;
             $sentences = $segmentor->segment(trim($key));
             for ($j = 0, $countSentences = count($sentences); $j < $countSentences; $j++) {
                 $sentences[$j] = trim($sentences[$j]);
             }
             echo "another sentence<br/>";
             while (1) {
                 $found = 0;
                 //if source line is a part of translation
                 if ($temp1 == "NULL" && $sent_ind < count($sentences)) {
                     $temp1 = $sentences[$sent_ind];
                     $sent_ind++;
                 }
                 if ($temp2 == "NULL") {
                     $temp2 = $source_lng_sentence;
                     $index1;
                 }
                 $temp1 = trim($temp1);
                 $temp2 = trim($temp2);
                 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) {
                     $found = 1;
                     echo "inside strpos_function({$temp1},{$temp2})<br/>";
                     if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                         echo "inside strlen({$temp1})==strlen({$temp2}) and ####start= {$start}<br/>";
                         for ($u = 0; $u < $start; $u++) {
                             prev($this->alignment_table);
                         }
                         $d = key($this->alignment_table);
                         $key_value = $key_value . $d;
                         $value = $value . current($this->alignment_table);
                         for ($u = 0; $u < $start - 1; $u++) {
                             echo "outside<br/>";
                             next($this->alignment_table);
                             $d = key($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . current($this->alignment_table);
                         }
                         $array = array($key_value, $value, $dummy);
                         $start = 0;
                         return $array;
                     }
                     $temp1 = substr($temp1, strlen($temp2));
                     if ($temp1 == "") {
                         $temp1 = "NULL";
                     }
                     while ($index1 + 1 < count($sentence_array)) {
                         if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") {
                             $temp2 = $sentence_array[$index1 + 1];
                             $index1++;
                             break;
                         }
                         $index1++;
                     }
                     continue;
                 } else {
                     if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) {
                         $found = 1;
                         if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                             echo "inside strlen({$temp1})==strlen({$temp2})  and ####start= {$start}<br/>";
                             for ($u = 0; $u < $start; $u++) {
                                 prev($this->alignment_table);
                             }
                             $d = key($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . current($this->alignment_table);
                             for ($u = 0; $u < $start - 1; $u++) {
                                 next($this->alignment_table);
                                 $key_value = $key_value . key($this->alignment_table);
                                 $value = $value . current($this->alignment_table);
                             }
                             $array = array($key_value, $value, $dummy);
                             $start = 0;
                             return $array;
                         }
                         $temp2 = substr($temp2, strlen($temp1));
                         if ($sent_ind >= count($sentences)) {
                             $temp1 = "NULL";
                             break;
                         } else {
                             $temp1 = $sentences[$sent_ind];
                             $sent_ind++;
                         }
                     }
                 }
                 if ($found == 0) {
                     echo "break<br/>";
                     $start = 0;
                     $value = "";
                     break;
                 }
             }
             if ($found == 0) {
                 $temp1 = "NULL";
                 $temp2 = "NULL";
                 $index1 = $index;
             }
         }
     } else {
         $times = 0;
         $i = -1;
         $temp1 = "NULL";
         $temp2 = "NULL";
         $index1 = $index;
         $start = 0;
         $value = "";
         $found = 0;
         foreach ($this->alignment_table as $key => $val) {
             $start++;
             $sent_ind = 0;
             $sentences = $segmentor->segment(trim($val));
             for ($j = 0, $countSentences = count($sentences); $j < $countSentences; $j++) {
                 $sentences[$j] = trim($sentences[$j]);
             }
             while (1) {
                 $found = 0;
                 if ($temp1 == "NULL" && $sent_ind < count($sentences)) {
                     $temp1 = $sentences[$sent_ind];
                     $sent_ind++;
                 }
                 if ($temp2 == "NULL") {
                     $temp2 = $source_lng_sentence;
                     $index1;
                 }
                 $temp1 = trim($temp1);
                 $temp2 = trim($temp2);
                 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) {
                     $found = 1;
                     if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                         for ($u = 0; $u < $start; $u++) {
                             prev($this->alignment_table);
                         }
                         $d = current($this->alignment_table);
                         $key_value = $key_value . $d;
                         $value = $value . key($this->alignment_table);
                         for ($u = 0; $u < $start - 1; $u++) {
                             next($this->alignment_table);
                             $d = current($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . key($this->alignment_table);
                         }
                         $array = array($key_value, $value, $dummy);
                         $start = 0;
                         return $array;
                     }
                     $temp1 = substr($temp1, strlen($temp2));
                     if ($temp1 == "") {
                         $temp1 = "NULL";
                     }
                     while ($index1 + 1 < count($sentence_array)) {
                         if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") {
                             $temp2 = $sentence_array[$index1 + 1];
                             $index1++;
                             break;
                         }
                         $index1++;
                     }
                     continue;
                 } else {
                     if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) {
                         $found = 1;
                         if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                             for ($u = 0; $u < $start; $u++) {
                                 prev($this->alignment_table);
                             }
                             $d = current($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . key($this->alignment_table);
                             for ($u = 0; $u < $start - 1; $u++) {
                                 next($this->alignment_table);
                                 $key_value = $key_value . current($this->alignment_table);
                                 $value = $value . current($this->alignment_table);
                             }
                             $array = array($key_value, $value, $dummy);
                             $start = 0;
                             return $array;
                         }
                         $temp2 = substr($temp2, strlen($temp1));
                         if ($sent_ind >= count($sentences)) {
                             $temp1 = "NULL";
                             break;
                         } else {
                             $temp1 = $sentences[$sent_ind];
                             $sent_ind++;
                         }
                     }
                 }
                 if ($found == 0) {
                     echo "break<br/>";
                     $start = 0;
                     $value = "";
                     break;
                 }
             }
             if ($found == 0) {
                 $temp1 = "NULL";
                 $temp2 = "NULL";
                 $index1 = $index;
             }
         }
     }
     $array = array("", "NULL");
     return $array;
 }
 function splitInLogicalChunksOf450CharsMax($text)
 {
     $chunks = array();
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     $sentences = $segmentor->segment($text);
     $ii = 0;
     $chunk = $sentences[$ii];
     while ($ii < count($sentences) - 1) {
         $ii++;
         if (strlen(urlencode($chunk)) < 450) {
             $chunk = $chunk . $sentences[$ii];
         } else {
             $chunks[] = $chunk;
             $chunk = $sentences[$ii];
         }
     }
     $chunks[] = $chunk;
     return $chunks;
 }
Exemplo n.º 5
0
 public function UpdatingTargetPage($source_outofdate, $source_modified, $target_outofdate, $target_modified, $source_lng, $target_lng)
 {
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     $source_outofdate_string = $source_outofdate;
     $source_modified_string = $source_modified;
     $target_outofdate_string = $target_outofdate;
     $target_modified_string = $target_modified;
     $source_outofdate_sentences = $segmentor->segment($source_outofdate_string);
     $source_modified_sentences = $segmentor->segment($source_modified_string);
     $target_outofdate_sentences = $segmentor->segment($target_outofdate_string);
     $target_modified_sentences = $segmentor->segment($target_modified_string);
     $target_modified_sentences[count($target_modified_sentences)] = 'dummy';
     $i = -1;
     for ($a = 0, $aCountSourceOutofdateSentences = count($source_outofdate_sentences); $a < $aCountSourceOutofdateSentences; $a++) {
         $source_outofdate_sentences[$a] = trim($source_outofdate_sentences[$a]);
     }
     for ($a = 0, $aCountSourceModifiedSentences = count($source_modified_sentences); $a < $aCountSourceModifiedSentences; $a++) {
         $source_modified_sentences[$a] = trim($source_modified_sentences[$a]);
     }
     for ($a = 0, $aCountTargetOutofdateSentences = count($target_outofdate_sentences); $a < $aCountTargetOutofdateSentences; $a++) {
         $target_outofdate_sentences[$a] = trim($target_outofdate_sentences[$a]);
     }
     for ($a = 0, $aCountTargetModifiedSentences = count($target_modified_sentences); $a < $aCountTargetModifiedSentences; $a++) {
         $target_modified_sentences[$a] = trim($target_modified_sentences[$a]);
     }
     $update = new Multilingual_Aligner_UpdateSentences1();
     $source_diff = $update->DifferencebetweenOriginalFileandModifiedFile($source_outofdate_sentences, $source_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 1);
     $target_diff = $update->DifferencebetweenOriginalFileandModifiedFile($target_outofdate_sentences, $target_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 0);
     for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) {
         $source_diff[$a] = trim($source_diff[$a]);
     }
     for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) {
         $target_diff[$a] = trim($target_diff[$a]);
         if ($target_diff[$a] == '+dummy') {
             unset($target_diff[$a]);
         }
     }
     $target_diff = array_values($target_diff);
     for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) {
         $index = 0;
         $i = 2;
         if ($source_diff[$a] == '' || $source_diff[$a][0] != '+') {
             $pat = 0;
             if ($source_diff[$a] != '' && $source_diff[$a][0] == '<' && is_numeric($source_diff[$a][1])) {
                 $b = 2;
                 while (is_numeric($source_diff[$a][$b])) {
                     $b++;
                 }
                 if ($source_diff[$a][$b] == '>') {
                     $pat = 1;
                 }
             }
             if ($source_diff[$a] == '' || $pat == 0) {
                 $index = array_search($source_diff[$a], $source_diff);
                 $index++;
                 $first = 0;
                 while (($c = $this->array_search_function($source_diff[$a], $source_diff, $index)) != -1) {
                     $first = 1;
                     $source_diff[$c] = '<$i>' . $source_diff[$c];
                     $i++;
                     $index = $c + 1;
                 }
                 $source_diff[$a] = '<1>' . $source_diff[$a];
             }
         }
     }
     for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) {
         $index = 1;
         $i = 2;
         if ($target_diff[$a] == '' || $target_diff[$a][0] != '+') {
             $pat = 0;
             if ($target_diff[$a] != '' && $target_diff[$a][0] == '<' && is_numeric($target_diff[$a][1])) {
                 $b = 2;
                 while (is_numeric($target_diff[$a][$b])) {
                     $b++;
                 }
                 if ($target_diff[$a][$b] == '>') {
                     $pat = 1;
                 }
             }
             if ($target_diff[$a] == '' || $pat == 0) {
                 $index = array_search($target_diff[$a], $target_diff);
                 $index++;
                 $first = 0;
                 while (($c = $this->array_search_function($target_diff[$a], $target_diff, $index)) != -1) {
                     $first = 1;
                     $target_diff[$c] = '<$i>' . $target_diff[$c];
                     $i++;
                     $index = $c + 1;
                 }
                 $target_diff[$a] = '<1>' . $target_diff[$a];
             }
         }
     }
     $final_updated = $update->FinalUpdatedFileinTagetLanguage($source_diff, $target_diff);
     /*
     				 echo "final_updated_sentences<br/>";
     				 foreach ($final_updated as $item)
     				 echo "sentence-> ".$item."<br/>";
     				 $content = implode(' ', $final_updated);
     				 echo "$content<br/>";
     				 if (strcmp("Firefox supporte les caract�res internationaux pour des langues tel que lindien. Added_Source This is a test statement. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.", $content)==0)
     				 echo "match<br/>";
     				 else
     				 echo "fail";
     */
     return $final_updated;
 }
 public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng, $key_value, $sentence_array, $index)
 {
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     if ($source_lng == $this->l1) {
         $k = 1;
     } else {
         if ($source_lng == $this->l2) {
             $k = 0;
         }
     }
     foreach ($this->alignment_table as $key => $val) {
         if ($k == 1) {
             $sentences = $segmentor->segment(trim($key));
             if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) {
                 //if one of those is matched
                 $found = 1;
                 for ($j = 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) {
                     $flag = 0;
                     if ($l + $index >= count($sentence_array)) {
                         $found = 0;
                         break;
                     }
                     if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) {
                         if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") {
                             //if it is an added sentence
                             $found = 0;
                             break;
                         } else {
                             $flag = 1;
                         }
                     }
                     //if
                     if ($flag == 0) {
                         $j++;
                     }
                 }
                 //for
                 if ($found == 1) {
                     $key_value = $key;
                     $array = array($key, $val);
                     return $array;
                 }
                 //if
             }
             //if
         } else {
             $sentences = $segmentor->segment(trim($val));
             if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) {
                 //if one of those is matched
                 $found = 1;
                 for ($j = $i + 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) {
                     $flag = 0;
                     if ($l + $index >= count($sentence_array)) {
                         $found = 0;
                         break;
                     }
                     if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) {
                         if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") {
                             //if it is an added sentence
                             $found = 0;
                             break;
                         } else {
                             $flag = 1;
                         }
                     }
                     //if
                     if ($flag == 0) {
                         $j++;
                     }
                 }
                 //for
                 if ($found == 1) {
                     $key_value = $val;
                     $array = array($val, $key);
                     return $array;
                 }
                 //if
             }
             //if
         }
         //else
     }
     //foreach
     if ($k == 1) {
         $times = 0;
         $i = -1;
         $temp1 = "NULL";
         $temp2 = "NULL";
         $index1 = $index;
         $start = 0;
         $value = "";
         $found = 0;
         foreach ($this->alignment_table as $key => $val) {
             $start++;
             $sent_ind = 0;
             $sentences = $segmentor->segment(trim($key));
             for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) {
                 $sentences[$j] = trim($sentences[$j]);
             }
             while (1) {
                 $found = 0;
                 if ($temp1 == "NULL" && $sent_ind < count($sentences)) {
                     $temp1 = $sentences[$sent_ind];
                     $sent_ind++;
                 }
                 if ($temp2 == "NULL") {
                     $temp2 = $source_lng_sentence;
                     $index1;
                 }
                 $temp1 = trim($temp1);
                 $temp2 = trim($temp2);
                 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) {
                     $found = 1;
                     if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                         for ($u = 0; $u < $start; $u++) {
                             //return key and val
                             prev($this->alignment_table);
                         }
                         $d = key($this->alignment_table);
                         $key_value = $key_value . $d;
                         $value = $value . current($this->alignment_table);
                         for ($u = 0; $u < $start - 1; $u++) {
                             //return key and val
                             next($this->alignment_table);
                             $d = key($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . current($this->alignment_table);
                         }
                         $array = array($key_value, $value, $dummy);
                         $start = 0;
                         return $array;
                     }
                     $temp1 = substr($temp1, strlen($temp2));
                     if ($temp1 == "") {
                         $temp1 = "NULL";
                     }
                     while ($index1 + 1 < count($sentence_array)) {
                         if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") {
                             $temp2 = $sentence_array[$index1 + 1];
                             $index1++;
                             break;
                         }
                         //if
                         $index1++;
                     }
                     //while
                     continue;
                 } else {
                     if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) {
                         $found = 1;
                         if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                             for ($u = 0; $u < $start; $u++) {
                                 //return key and val
                                 prev($this->alignment_table);
                             }
                             $d = key($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . current($this->alignment_table);
                             for ($u = 0; $u < $start - 1; $u++) {
                                 //return key and val
                                 next($this->alignment_table);
                                 $key_value = $key_value . key($this->alignment_table);
                                 $value = $value . current($this->alignment_table);
                             }
                             $array = array($key_value, $value, $dummy);
                             $start = 0;
                             return $array;
                         }
                         $temp2 = substr($temp2, strlen($temp1));
                         if ($sent_ind >= count($sentences)) {
                             $temp1 = "NULL";
                             break;
                         } else {
                             $temp1 = $sentences[$sent_ind];
                             $sent_ind++;
                         }
                     }
                 }
                 //if strpos_function($sentence[0],$source_lng_sentence)
                 if ($found == 0) {
                     $start = 0;
                     $value = "";
                     break;
                 }
                 //if
             }
             //while
             if ($found == 0) {
                 $temp1 = "NULL";
                 $temp2 = "NULL";
                 $index1 = $index;
             }
         }
         //foreach
     } else {
         //if $k
         $times = 0;
         $i = -1;
         $temp1 = "NULL";
         $temp2 = "NULL";
         $index1 = $index;
         $start = 0;
         $value = "";
         $found = 0;
         foreach ($this->alignment_table as $key => $val) {
             $start++;
             $sent_ind = 0;
             $sentences = $segmentor->segment(trim($val));
             for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) {
                 $sentences[$j] = trim($sentences[$j]);
             }
             while (1) {
                 $found = 0;
                 if ($temp1 == "NULL" && $sent_ind < count($sentences)) {
                     $temp1 = $sentences[$sent_ind];
                     $sent_ind++;
                 }
                 if ($temp2 == "NULL") {
                     $temp2 = $source_lng_sentence;
                     $index1;
                 }
                 $temp1 = trim($temp1);
                 $temp2 = trim($temp2);
                 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) {
                     $found = 1;
                     if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                         for ($u = 0; $u < $start; $u++) {
                             //return key and val
                             prev($this->alignment_table);
                         }
                         $d = current($this->alignment_table);
                         $key_value = $key_value . $d;
                         $value = $value . key($this->alignment_table);
                         for ($u = 0; $u < $start - 1; $u++) {
                             //return key and val
                             next($this->alignment_table);
                             $d = current($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . key($this->alignment_table);
                         }
                         $array = array($key_value, $value, $dummy);
                         $start = 0;
                         return $array;
                     }
                     $temp1 = substr($temp1, strlen($temp2));
                     if ($temp1 == "") {
                         $temp1 = "NULL";
                     }
                     while ($index1 + 1 < count($sentence_array)) {
                         if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") {
                             $temp2 = $sentence_array[$index1 + 1];
                             $index1++;
                             break;
                         }
                         //if
                         $index1++;
                     }
                     //while
                     continue;
                 } else {
                     if (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) {
                         $found = 1;
                         if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) {
                             for ($u = 0; $u < $start; $u++) {
                                 //return key and val
                                 prev($this->alignment_table);
                             }
                             $d = current($this->alignment_table);
                             $key_value = $key_value . $d;
                             $value = $value . key($this->alignment_table);
                             for ($u = 0; $u < $start - 1; $u++) {
                                 //return key and val
                                 next($this->alignment_table);
                                 $key_value = $key_value . current($this->alignment_table);
                                 $value = $value . current($this->alignment_table);
                             }
                             $array = array($key_value, $value, $dummy);
                             $start = 0;
                             return $array;
                         }
                         $temp2 = substr($temp2, strlen($temp1));
                         if ($sent_ind >= count($sentences)) {
                             $temp1 = "NULL";
                             break;
                         } else {
                             $temp1 = $sentences[$sent_ind];
                             $sent_ind++;
                         }
                     }
                 }
                 if ($found == 0) {
                     $start = 0;
                     $value = "";
                     break;
                 }
             }
             //while
             if ($found == 0) {
                 $temp1 = "NULL";
                 $temp2 = "NULL";
                 $index1 = $index;
             }
         }
         //foreach
     }
     //else
     $array = array("", "NULL");
     return $array;
 }
Exemplo n.º 7
0
 public function _segment_into_sentences($text)
 {
     $segmentor = new Multilingual_Aligner_SentenceSegmentor();
     $sentences = $segmentor->segment($text);
     return $sentences;
 }