echo "took " . (microtime(true) - $stime) . " seconds"; /**/ echo "<hr>"; $stime = microtime(true); $syntax = nanomacro('{*Agricola} {_cum$({he|Agricola} {had arrived})} {_imp$(want) to (say|speak)} {_a few (words|things)}.', ['cum' => ', when ${0},', 'a' => '[a[n]|the]', 'imp' => '(was ${0}ing|${0}ed)'], 4); var_dump($syntax); var_dump(compare_syntax($syntax, "Agricola, when he had arrived, wanted to say a few things", ["unescaped" => true])); var_dump(compare_syntax($syntax, "When he had arrived, Agricola was wanting to say a few words", ["unescaped" => true])); echo "took " . (microtime(true) - $stime) . " seconds"; /**/ echo "<hr>"; $stime = microtime(true); $sentence = " , When he said {$OP_LQUOTE}Hello?{$OP_RQUOTE}, they were silent. , {$OP_LQUOTE}Hello{$OP_RQUOTE}, he said. {$OP_LQUOTE}What happened?{$OP_RQUOTE}. He repeated, {$OP_LQUOTE}Hello{$OP_RQUOTE}. {$OP_LQUOTE}What happened?{$OP_RQUOTE}"; var_dump($sentence); var_dump(lexify_punctuation($sentence)); var_dump(normalize_punctuation($sentence)); echo "took " . (microtime(true) - $stime) . " seconds"; /**/ // centuriō, iuvenem cōnspicātus, “hunc agnōscō!” inquit. // kenturioo, iuvenem koonspikaatus, "hunk agnooskoo!" inquit. echo "<hr>"; $stime = microtime(true); $syntax = '{*_a centurion} {_perfactv$(centurion|he)$(caught sight of)$(_a young man)} {said} {_quot$(I recognize (this [man|guy]|him))}.'; $dict = ['a' => '[a[n]|the]', 'perfactv' => ', (having ${1} ${2}|(when|once) ${0} had ${1} ${2}|who had ${1}${2}),', 'quot' => ', “${0}”,']; var_dump(nanomacro($syntax, $dict, 4)); $answer = 'the centurion having caught sight of the young man said I recognize him'; var_dump(compare_syntax3($syntax, $answer, $dict)); $answer = 'the centurion said I recognize him having caught sight of the young man'; var_dump(compare_syntax3($syntax, $answer, $dict)); $answer = 'I recognize him said the centurion having caught sight of the young man'; var_dump(compare_syntax3($syntax, $answer, $dict));
function compare_syntax3($syntax, $target, $dictionary = null, $matchall = false, $distance = 0, $lang = NULL) { if ($dictionary === null) { $dictionary = nano_dfdict(); } $syntax = nanomacro($syntax, $dictionary, 4); $match = compare_syntax($syntax, $target, ["unescaped" => true, "matchall" => $matchall, "max_distance" => $distance, "lang" => $lang]); return $match ? normalize_punctuation($match) : $match; }
<br><?php } ?> </ul> <h2>Punctuation Normalization</h2> <p>You might have noticed that some of the exempla in the last section had awful punctuation. Commas everywhere. Well, there's a reason for that, explained in the next section, but we still need to clean it up. Which is what <strong>Punctuation Normalization</strong> does. Instead of going into great detail with how it works, here are some exempla: <ul> <?php foreach (['The centurion said , “Hey there!”,.', 'The centurion , having seen the guard, shouted.', ', Having seen the guard, the centurion shouted.', '“Let us go to Rome!” “We will have fun there”, he said.', '“I do not know, will we?”, the other replied , once he heard this, .'] as $example) { ?> <li><?php echo2($example); echo "becomes"; echo2(normalize_punctuation($example)); ?> <br><?php } ?> </ul> <p>Ahhhh, doesn't it look like English again? <h2>Natural Language Pattern Matching</h2> <p>A few more rules again (pseudo-BNF this time): <ol>