Esempio n. 1
0
<?php

require_once 'lemmatizer.php';
$time = null;
$error = null;
$result = null;
if (isset($_GET['word']) && preg_match("/^[a-zA-Z]+-?[a-zA-Z]*\$/", $_GET['word'])) {
    $subject = strtolower($_GET['word']);
    $start = microtime(true);
    $lemmatizer = new Lemmatizer();
    $result = $lemmatizer->eat($subject);
    $time = round(microtime(true) - $start, 5);
    if ($lemmatizer->error) {
        $error = $lemmatizer->error;
    }
    unset($lemmatizer);
}
?>

<!doctype html>
<html>

<head>
    <title>Indonesian Lemmatizer</title>
    <link rel="stylesheet" type="text/css" href="css/normalize.css">
    <link rel="stylesheet" type="text/css" href="css/global.css">
</head>

<body>
    <div class="list" id="top">&nbsp;</div>
Esempio n. 2
0
<?php

require 'lemmatizer-debug.php';
/**

    TESTING / DEBUG LINES

**/
$subject = "pertanggungjawabannyalah";
$lemmatizer = new Lemmatizer();
if (isset($_GET['word']) && preg_match("/^[a-zA-Z]+-?[a-zA-Z]*\$/", $_GET['word'])) {
    $subject = strtolower($_GET['word']);
}
echo "<form action='#' method='GET'><input type='text' name='word' /><input type='submit' value='Lemmatize' /></form><hr />";
echo "Input: <strong>{$subject}</strong><br />";
$start = microtime(true);
$result = $lemmatizer->eat($subject);
$removed = $lemmatizer->getRemoved();
echo "<br /><br />";
foreach ($removed as $key => $affix) {
    if ($key == "derivational_prefix" && $affix != '') {
        echo "<br />Removed {$key} : ";
        foreach ($lemmatizer->complex_prefix_tracker as $array) {
            $value = reset($array);
            echo key($array) . ",";
            if ($value) {
                echo "  added: {$value}";
            }
        }
    } else {
        if ($affix != '') {
Esempio n. 3
0
ini_set('max_execution_time', 3600);
// Unique words only?
$unique = true;
require "lemmatizer.php";
$ready = microtime(true);
$data_count = 0;
$db = new PDO("mysql:host=localhost;dbname=lemmatizer", "root", "");
$q = $db->query("SELECT * FROM word " . ($unique ? "GROUP BY `value`" : ""));
$query_string = "";
$rows = $q->fetchAll();
$last = count($rows) - 1;
foreach ($rows as $key => $row) {
    $source = $row['source'];
    $input = $row['value'];
    $error = "";
    $obj = new Lemmatizer();
    $output = $obj->eat($input);
    if ($obj->error) {
        $error = $obj->error;
    }
    $diff = round(microtime(true) - $start, 5);
    unset($obj);
    if ($data_count == 0) {
        $query_string = "INSERT INTO result (`input`, `output`, `process_time`, `issue`, `source`) VALUES ";
    }
    $query_string .= "('{$input}', '{$output}', '{$diff}', '{$error}', '{$source}'),";
    $data_count++;
    if ($data_count == 750 || $key == $last) {
        var_dump($query_string);
        $query_string = preg_replace('/,$/', ';', $query_string);
        $db->exec($query_string);