コード例 #1
0
ini_set('display_errors', 1);
error_reporting(E_ALL);
/**
 * @see https://github.com/FerreroJeremy/DBNary-PHP-Interface
 */
require_once 'DBNaryInterface.php';
/**
 * @see https://github.com/neitanod/forceutf8
 */
require_once 'Encoding.php';
$db = new DBNaryInterface();
$encode = new Encoding();
$db->setLanguageFrom('french');
$db->setLanguageTo('english');
$db->connect();
$words = extractLinesFromFile('dicos/dico_295065.txt');
foreach ($words as $word) {
    $word = $encode->toUTF8(trim($word));
    $word = getFirstWord(trim($word));
    $db->getTranslations(trim($word));
    $availableTranslations = $db->getResultInList();
    if (!empty($availableTranslations)) {
        foreach ($availableTranslations as $translation) {
            writeInFile('dico_from_dbnary.txt', trim($word) . ' @ ' . trim($translation) . chr(10));
        }
    }
}
/* * ***********************
 *      FUNCTIONS
 * *********************** */
/**
コード例 #2
0
/**
 * Jérémy Ferrero<br/>
 * Compilatio <br/>
 * GETALP - Laboratory of Informatics of Grenoble <br/>
 *
 * This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
 * For more information, see http://creativecommons.org/licenses/by-sa/4.0/
 */
set_time_limit(100000);
ini_set('display_errors', 1);
error_reporting(E_ALL);
$pathLang1 = 'lex/fr.txt';
$pathLang2 = 'lex/es.txt';
$contentLang1 = extractLinesFromFile($pathLang1);
$contentLang2 = extractLinesFromFile($pathLang2);
$dico = array();
$lex = array();
foreach ($contentLang1 as $keyLang1 => $wordLang1) {
    if (!empty(trim($wordLang1)) && !empty(trim($contentLang2[$keyLang1]))) {
        array_push($dico, trim($wordLang1) . ' @ ' . trim($contentLang2[$keyLang1]));
    }
}
$dico = array_unique($dico);
asort($dico);
$string = '';
foreach ($dico as $entry) {
    $string .= $entry . chr(10);
}
writeFile('dico/dico_fr_es.txt', trim($string));
/* * ***********************
コード例 #3
0
$count = 0;
$limit = -1;
$lang1 = 'en';
$lang2 = 'fr';
$lang3 = 'es';
$filesLang1 = glob($inputDir . $lang1 . '/*');
asort($filesLang1);
foreach ($filesLang1 as $key => $fileLang1) {
    $path_parts = pathinfo($fileLang1);
    $fileName = substr($path_parts['filename'], 0, -3);
    $fileLang1 = $inputDir . $lang1 . '/' . $fileName . '-' . $lang1 . '.txt';
    $fileLang2 = $inputDir . $lang2 . '/' . $fileName . '-' . $lang2 . '.txt';
    $fileLang3 = $inputDir . $lang3 . '/' . $fileName . '-' . $lang3 . '.txt';
    $linesOfLang1 = extractLinesFromFile($fileLang1);
    $linesOfLang2 = extractLinesFromFile($fileLang2);
    $linesOfLang3 = extractLinesFromFile($fileLang3);
    foreach ($linesOfLang1 as $key => $element) {
        if (!empty($linesOfLang1[$key]) && !empty($linesOfLang2[$key]) && !empty($linesOfLang3[$key])) {
            $alignment[$count]['filename'] = $fileName;
            $alignment[$count]['en'] = mb_strtolower($linesOfLang1[$key]);
            $alignment[$count]['fr'] = mb_strtolower($linesOfLang2[$key]);
            $alignment[$count]['es'] = mb_strtolower($linesOfLang3[$key]);
            writeFile('verif.txt', $fileName . chr(10));
            writeFile('verif.txt', $alignment[$count]['en'] . chr(10));
            writeFile('verif.txt', $alignment[$count]['fr'] . chr(10));
            writeFile('verif.txt', $alignment[$count]['es'] . chr(10));
            writeFile('verif.txt', chr(10));
            $count++;
        }
    }
}
コード例 #4
0
 *
 * This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
 * For more information, see http://creativecommons.org/licenses/by-sa/4.0/
 */
set_time_limit(100000);
ini_set('display_errors', 1);
error_reporting(E_ALL);
$language = 'lang';
$dir = './input_directory_path/';
$fileNumber = 0;
if (is_dir($dir)) {
    if ($dh = opendir($dir)) {
        while (($file = readdir($dh)) !== false) {
            if (substr($file, 0, 1) != '.') {
                $name = basename($file);
                $content = extractLinesFromFile($dir . $name);
                // Pop the last element of the array if necessary (new line for example).
                // array_pop($content);
                for ($i = 0, $j = count($content); $i < $j; $i++) {
                    $posTable[$i] = splitStringBySpacesWithCaseRespect($content[$i]);
                }
                $i = 0;
                $treeSize = count($posTable);
                $str = '';
                $tree = array();
                $nominalChunkTags = array('NN', 'NNS', 'NP', 'DT', 'JJ', 'IN', 'TO', 'NC', 'ADJ', 'NOM', 'NAM', 'DET:ART', 'DET:POS', 'PRP', 'PRP:det', 'PREP', 'ART', 'DET');
                while ($i < $treeSize - 1) {
                    $leaf = $posTable[$i];
                    $followingLeaf = $posTable[$i + 1];
                    if (!in_array($leaf[1], $nominalChunkTags) || wordCountAccordingMicrosoftWordApproach($str) >= 10) {
                        array_push($tree, trim($str));