require 'lemmatizer-debug.php'; /** TESTING / DEBUG LINES **/ $subject = "pertanggungjawabannyalah"; $lemmatizer = new Lemmatizer(); if (isset($_GET['word']) && preg_match("/^[a-zA-Z]+-?[a-zA-Z]*\$/", $_GET['word'])) { $subject = strtolower($_GET['word']); } echo "<form action='#' method='GET'><input type='text' name='word' /><input type='submit' value='Lemmatize' /></form><hr />"; echo "Input: <strong>{$subject}</strong><br />"; $start = microtime(true); $result = $lemmatizer->eat($subject); $removed = $lemmatizer->getRemoved(); echo "<br /><br />"; foreach ($removed as $key => $affix) { if ($key == "derivational_prefix" && $affix != '') { echo "<br />Removed {$key} : "; foreach ($lemmatizer->complex_prefix_tracker as $array) { $value = reset($array); echo key($array) . ","; if ($value) { echo " added: {$value}"; } } } else { if ($affix != '') { echo "<br />Removed {$key} : " . $affix;
// Unique words only? $unique = true; require "lemmatizer.php"; $ready = microtime(true); $data_count = 0; $db = new PDO("mysql:host=localhost;dbname=lemmatizer", "root", ""); $q = $db->query("SELECT * FROM word " . ($unique ? "GROUP BY `value`" : "")); $query_string = ""; $rows = $q->fetchAll(); $last = count($rows) - 1; foreach ($rows as $key => $row) { $source = $row['source']; $input = $row['value']; $error = ""; $obj = new Lemmatizer(); $output = $obj->eat($input); if ($obj->error) { $error = $obj->error; } $diff = round(microtime(true) - $start, 5); unset($obj); if ($data_count == 0) { $query_string = "INSERT INTO result (`input`, `output`, `process_time`, `issue`, `source`) VALUES "; } $query_string .= "('{$input}', '{$output}', '{$diff}', '{$error}', '{$source}'),"; $data_count++; if ($data_count == 750 || $key == $last) { var_dump($query_string); $query_string = preg_replace('/,$/', ';', $query_string); $db->exec($query_string); $data_count = 0;