die("Usage {$argv[0]} TEXT_FILE [ENCODING] [FILE_ENCODING]" . PHP_EOL); } $cwd = getcwd(); $text_file = $argv[1]; $encoding = $argc > 2 ? $argv[2] : 'utf-8'; $file_encoding = $argc > 3 ? $argv[3] : 'utf-8'; $lang = 'rus'; $morphy_ver = getenv('PHPMORPHY_VER'); if ($morphy_ver !== "0.2") { set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); require 'phpMorphy.php'; } else { require_once MORPHY_2X_DIR . '/src/common.php'; } $dict_dir = PHPMORPHY_DIR . '/../dicts/' . $encoding; $words = load_words($text_file, REMOVE_DUPLICATES, $encoding, $file_encoding); echo "Total words " . (REMOVE_DUPLICATES ? "(unique)" : '') . " = " . count($words) . PHP_EOL; //print_memory_usage(); //bench_mystem($words, $encoding, PATH_TO_MYSTEM); bench_porter($words, $encoding); //bench_enchant($words); //bench_pspell($words); //print_memory_usage(); bench_morphy_dict($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_FILE); bench_morphy_dict($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_SHM); bench_morphy_dict($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_MEM); bench_morphy($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_FILE, false); bench_morphy($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_FILE, true); bench_morphy($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_SHM, false); bench_morphy($words, $encoding, $dict_dir, $lang, PHPMORPHY_STORAGE_SHM, true); //print_memory_usage();
$wcat = "Random"; if (isset($_GET["wordcat"])) { $wcat = $_GET["wordcat"]; } //load the word files if ($wcat == "Animals" || $wcat == "Random") { load_words("animals.txt", "Animals"); } if ($wcat == "Things" || $wcat == "Random") { load_words("things.txt", "Things"); } if ($wcat == "Colors" || $wcat == "Random") { load_words("colors.txt", "Colors"); } if ($wcat == "Verbs" || $wcat == "Random") { load_words("verbs.txt", "Verbs"); } function load_words($fname, $catname) { global $wordlist; global $wcat, $wc; $file = fopen($fname, 'r') or die($php_errormsg); while (!feof($file)) { if ($line = fgets($file)) { $words = preg_split('/\\s+/', $line, -1, PREG_SPLIT_NO_EMPTY); // load the words now for ($i = 0; $i < count($words); $i++) { // load the words if ($wcat == $catname || $wcat == "Random") { $wordlist[$wc] = $words[$i]; $wc++;