ini_set('display_errors', 1); error_reporting(E_ALL); /** * @see https://github.com/FerreroJeremy/DBNary-PHP-Interface */ require_once 'DBNaryInterface.php'; /** * @see https://github.com/neitanod/forceutf8 */ require_once 'Encoding.php'; $db = new DBNaryInterface(); $encode = new Encoding(); $db->setLanguageFrom('french'); $db->setLanguageTo('english'); $db->connect(); $words = extractLinesFromFile('dicos/dico_295065.txt'); foreach ($words as $word) { $word = $encode->toUTF8(trim($word)); $word = getFirstWord(trim($word)); $db->getTranslations(trim($word)); $availableTranslations = $db->getResultInList(); if (!empty($availableTranslations)) { foreach ($availableTranslations as $translation) { writeInFile('dico_from_dbnary.txt', trim($word) . ' @ ' . trim($translation) . chr(10)); } } } /* * *********************** * FUNCTIONS * *********************** */ /**
/** * Jérémy Ferrero<br/> * Compilatio <br/> * GETALP - Laboratory of Informatics of Grenoble <br/> * * This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. * For more information, see http://creativecommons.org/licenses/by-sa/4.0/ */ set_time_limit(100000); ini_set('display_errors', 1); error_reporting(E_ALL); $pathLang1 = 'lex/fr.txt'; $pathLang2 = 'lex/es.txt'; $contentLang1 = extractLinesFromFile($pathLang1); $contentLang2 = extractLinesFromFile($pathLang2); $dico = array(); $lex = array(); foreach ($contentLang1 as $keyLang1 => $wordLang1) { if (!empty(trim($wordLang1)) && !empty(trim($contentLang2[$keyLang1]))) { array_push($dico, trim($wordLang1) . ' @ ' . trim($contentLang2[$keyLang1])); } } $dico = array_unique($dico); asort($dico); $string = ''; foreach ($dico as $entry) { $string .= $entry . chr(10); } writeFile('dico/dico_fr_es.txt', trim($string)); /* * ***********************
$count = 0; $limit = -1; $lang1 = 'en'; $lang2 = 'fr'; $lang3 = 'es'; $filesLang1 = glob($inputDir . $lang1 . '/*'); asort($filesLang1); foreach ($filesLang1 as $key => $fileLang1) { $path_parts = pathinfo($fileLang1); $fileName = substr($path_parts['filename'], 0, -3); $fileLang1 = $inputDir . $lang1 . '/' . $fileName . '-' . $lang1 . '.txt'; $fileLang2 = $inputDir . $lang2 . '/' . $fileName . '-' . $lang2 . '.txt'; $fileLang3 = $inputDir . $lang3 . '/' . $fileName . '-' . $lang3 . '.txt'; $linesOfLang1 = extractLinesFromFile($fileLang1); $linesOfLang2 = extractLinesFromFile($fileLang2); $linesOfLang3 = extractLinesFromFile($fileLang3); foreach ($linesOfLang1 as $key => $element) { if (!empty($linesOfLang1[$key]) && !empty($linesOfLang2[$key]) && !empty($linesOfLang3[$key])) { $alignment[$count]['filename'] = $fileName; $alignment[$count]['en'] = mb_strtolower($linesOfLang1[$key]); $alignment[$count]['fr'] = mb_strtolower($linesOfLang2[$key]); $alignment[$count]['es'] = mb_strtolower($linesOfLang3[$key]); writeFile('verif.txt', $fileName . chr(10)); writeFile('verif.txt', $alignment[$count]['en'] . chr(10)); writeFile('verif.txt', $alignment[$count]['fr'] . chr(10)); writeFile('verif.txt', $alignment[$count]['es'] . chr(10)); writeFile('verif.txt', chr(10)); $count++; } } }
* * This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. * For more information, see http://creativecommons.org/licenses/by-sa/4.0/ */ set_time_limit(100000); ini_set('display_errors', 1); error_reporting(E_ALL); $language = 'lang'; $dir = './input_directory_path/'; $fileNumber = 0; if (is_dir($dir)) { if ($dh = opendir($dir)) { while (($file = readdir($dh)) !== false) { if (substr($file, 0, 1) != '.') { $name = basename($file); $content = extractLinesFromFile($dir . $name); // Pop the last element of the array if necessary (new line for example). // array_pop($content); for ($i = 0, $j = count($content); $i < $j; $i++) { $posTable[$i] = splitStringBySpacesWithCaseRespect($content[$i]); } $i = 0; $treeSize = count($posTable); $str = ''; $tree = array(); $nominalChunkTags = array('NN', 'NNS', 'NP', 'DT', 'JJ', 'IN', 'TO', 'NC', 'ADJ', 'NOM', 'NAM', 'DET:ART', 'DET:POS', 'PRP', 'PRP:det', 'PREP', 'ART', 'DET'); while ($i < $treeSize - 1) { $leaf = $posTable[$i]; $followingLeaf = $posTable[$i + 1]; if (!in_array($leaf[1], $nominalChunkTags) || wordCountAccordingMicrosoftWordApproach($str) >= 10) { array_push($tree, trim($str));