PHP StanfordNLP\POSTagger примеры использования

Язык программирования: PHP

Класс/Тип: StanfordNLP\POSTagger

Примеров на hotexamples.com: 3

PHP StanfordNLP\POSTagger - 3 примера найдено. Это лучшие примеры PHP кода для StanfordNLP\POSTagger, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

tag(2)

batchTag(1)

Пример #1

Показать файл

Файл: tagger.php Проект: anninireland/sandbox

function tag_the_content($the_text)
{
    $time_start = microtime(true);
    // sets DIR path variable
    $dir = dirname(__FILE__);
    // loads tagger
    include $dir . '/PHP-Stanford-NLP/autoload.php';
    // creates tagger
    $pos = new \StanfordNLP\POSTagger($dir . '/PHP-Stanford-NLP/stanford-postagger-2015-04-20/models/english-left3words-distsim.tagger', $dir . '/PHP-Stanford-NLP/stanford-postagger-2015-04-20/stanford-postagger.jar');
    // calls tagger to tag the_content
    // $result = $pos->tag(explode(' ', get_the_content() )); //  *** change back to this in production ***
    $result = $pos->tag(explode(' ', $the_text));
    // echo json_encode($result);
    return $result;
}

Пример #2

Показать файл

Файл: aiassig1.php Проект: jsnolan/voicerecognition

<input type="submit" onclick="setValue();">
</form>

<script>
function setValue(){
	document.form.my_text.value = final_transcript;
	document.forms["form"].submit();
}
</script>

<?php 
$mySentence = $_POST['my_text'];
//Store the stated sentence
$query = "";
//Store the topic of the query
$pos = new \StanfordNLP\POSTagger('./models/english-left3words-distsim.tagger', './stanford-postagger.jar');
$ner = new \StanfordNLP\NERTagger('./classifiers/english.all.3class.distsim.crf.ser.gz', './stanford-ner.jar');
//Used for tagging the topic as either a person, organization or location
$curl = curl_init();
$i = 0;
//Used to navigate through loops
$valid = false;
//Used to check if a stated sentence is valid, defaults as invalid
//EXTRACT SPOKEN WORDS
if (substr($mySentence, 0, 8) === "What is ") {
    $valid = true;
    $position = strpos($mySentence, "is ");
    $query = substr($mySentence, $position + strlen("is "));
    $query = ucwords($query);
    //echo $query;
    $_SESSION["query"] = $query;

Пример #3

Показать файл

Файл: TextParser.php Проект: spotzi/Geotagger

 function run($text)
 {
     // The following appear in some Wikipedia texts and mess up the parsing:
     $textsToRemove = ["<br>", "</table>", "</dl>", "</ref>", "<ns>", "</ns>", "<id>", "</id>", "<small>", "<revision>", "<comment>", "</comment>", "<model>", "</model>", "<parentid>", "</parentid>"];
     $text = str_replace($textsToRemove, "", $text);
     $currentDir = getcwd();
     // needed for absolute paths to Stanford models
     $termArray = new TermArray();
     $startTime = microtime(true);
     if (!function_exists("splitIntoWords")) {
         // The POS and NER taggers need an array of arrays, where each sentence is
         // it's own array.
         function splitIntoWords($sentence)
         {
             return explode(' ', $sentence);
         }
     }
     $text_arrays = array_map("splitIntoWords", explode('.', $text));
     // Send the text to the POS tagger:
     $pos = new \StanfordNLP\POSTagger($currentDir . '/stanford-postagger-2015-04-20/models/english-left3words-distsim.tagger', $currentDir . '/stanford-postagger-2015-04-20/stanford-postagger.jar');
     $startTime = microtime(true);
     $resultPOS = $pos->batchTag($text_arrays)[0];
     $this->times["Run the POS tagger"] = microtime(true) - $startTime;
     if (printOutput()) {
         echo "<br>POS results:<br>";
         var_dump($resultPOS);
         echo "<br>";
     }
     if (!$resultPOS) {
         echo "<br>ERROR: POS tagging failed<br>";
         return false;
     }
     // Send the text to the NER:
     $ner = new \StanfordNLP\NERTagger($currentDir . '/stanford-ner-2015-04-20/classifiers/english.all.3class.distsim.crf.ser.gz', $currentDir . '/stanford-ner-2015-04-20/stanford-ner.jar');
     $startTime = microtime(true);
     $resultNER = $ner->batchTag($text_arrays)[0];
     $this->times["Run the NER tagger"] = microtime(true) - $startTime;
     if (printOutput()) {
         echo "<br>NER results:<br>";
         var_dump($resultNER);
         echo "<br><br>";
     }
     if (!$resultNER) {
         echo "<br>ERROR: NER tagging failed<br>";
         return false;
     }
     // Later code assumes that $resultPOS and $resultNER are indexed identically.
     // I have only seen these errors returned when the text contains something like
     // "<br>" which is handled differently by each tagger
     if (count($resultPOS) != count($resultNER)) {
         echo "<br>ERROR: POS and NER tagging are not indexed the same!<br>";
         return false;
     }
     $words = [];
     for ($i = 0, $size = count($resultNER); $i < $size; $i++) {
         if ($resultPOS[$i][0] != $resultNER[$i][0]) {
             echo "<br>ERROR: POS and NER tagging are not indexed the same!<br>";
             return false;
         }
         $words[$i] = $resultNER[$i][0];
     }
     // The next bunch of code loops through the text to find all terms
     $startTime = microtime(true);
     $currentStreak = 0;
     $streakContainsLocation = false;
     $streakContainsNoun = false;
     $isAfterPreposition = false;
     $isAfterConjunction = false;
     for ($i = 0, $size = count($resultPOS); $i < $size; $i++) {
         $isNoun = strncmp($resultPOS[$i][1], "NN", 2) == 0;
         $isAdjectiveOrNumber = (strcmp($resultPOS[$i][1], "CD") == 0 or strcmp($resultPOS[$i][1], "JJ") == 0);
         // adjective (so "first avenue" would catch the first)
         $isLocation = strcmp($resultNER[$i][1], "LOCATION") == 0;
         if ($isNoun or $isAdjectiveOrNumber or $isLocation) {
             $currentStreak++;
             if ($isLocation) {
                 $streakContainsLocation = true;
             }
             if ($isNoun) {
                 $streakContainsNoun = true;
             }
         } else {
             $streakContainsLocation = false;
             $streakContainsNoun = false;
             $currentStreak = 0;
             $isAfterConjunction = strcmp($resultPOS[$i][1], "CC") == 0;
             if (!$isAfterConjunction) {
                 // reset $isAfterPreposition only if this is not after a conjuction
                 // that way a text like "near Waterloo and Guelph" will tag both Waterloo
                 // and Guelph as after a preposition
                 $isAfterPreposition = (strcmp($resultPOS[$i][1], "IN") == 0 or strcmp($resultPOS[$i][1], "TO") == 0);
                 if (strcmp($resultPOS[$i][0], "for") == 0) {
                     // TODO: make this case insensitive?
                     $isAfterPreposition = false;
                 }
             }
         }
         if ($streakContainsLocation or $streakContainsNoun) {
             $phrase = $resultPOS[$i][0];
             $subStreakContainsNoun = $isNoun;
             $subStreakContainsLocation = $isLocation;
             if ($isNoun or $isLocation) {
                 $newTerm = $termArray->addTerm($phrase, $i);
                 $newTerm->isNoun = $isNoun;
                 $newTerm->isLocation = $isLocation;
                 $newTerm->isAfterPreposition = $isAfterPreposition;
             }
             for ($j = 1; $j < $currentStreak; $j++) {
                 $phrase = $resultPOS[$i - $j][0] . ' ' . $phrase;
                 $subStreakContainsNoun = ($subStreakContainsNoun or strncmp($resultPOS[$i - $j][1], "NN", 2) == 0);
                 $subStreakContainsLocation = ($subStreakContainsLocation or strcmp($resultNER[$i - $j][1], "LOCATION") == 0);
                 if ($subStreakContainsLocation or $subStreakContainsNoun) {
                     $newTerm = $termArray->addTerm($phrase, $i - $j);
                     $newTerm->isNoun = $subStreakContainsNoun;
                     $newTerm->isLocation = $subStreakContainsLocation;
                     $newTerm->isAfterPreposition = $isAfterPreposition;
                 }
             }
         }
     }
     $this->times["Loop through text to find locations"] = microtime(true) - $startTime;
     // Now we remove some terms from the array:
     $startTime = microtime(true);
     if ($termArray->terms) {
         if ($termArray->countLocations() > 0) {
             if (printOutput()) {
                 echo "<br>This text contains words tagged as locations, so we will only consider those words.<br>";
             }
             $termArray->removeNouns(false);
             $this->termTypeUsed = "Locations";
         } else {
             if (printOutput()) {
                 echo "<br>This text does not contain words tagged as locations, so we must only use nouns.<br>";
             }
             if ($termArray->countNounsAfterPrepositions() > 0) {
                 if (printOutput()) {
                     echo "Some nouns occured after prepositions, so we will only use those.<br>";
                 }
                 $termArray->removeNouns(true);
                 $this->termTypeUsed = "NounsAfterPrep";
             } else {
                 $this->termTypeUsed = "Nouns";
             }
         }
     } else {
         echo "Warning: no nouns or locations found in text.";
         $this->termTypeUsed = "None";
     }
     $this->times["Filter terms that are found"] = microtime(true) - $startTime;
     // the rest of this code deals with postal codes
     $startTime = microtime(true);
     $CanadaPostCodes = [];
     $USZipCodes = [];
     $DutchPostCodes = [];
     preg_match_all('/\\b[a-zA-Z][0-9][a-zA-Z][\\s]?[0-9][a-zA-Z][0-9]\\b/', $text, $CanadaPostCodes);
     preg_match_all('/\\b[0-9]{5}([\\s\\-][0-9]{4})?\\b/', $text, $USZipCodes);
     preg_match_all('/\\b[0-9]{4}[\\s]?[a-zA-Z]{2}\\b/', $text, $DutchPostCodes);
     $this->times["Find postal codes in the text"] = microtime(true) - $startTime;
     $startTime = microtime(true);
     foreach ($CanadaPostCodes[0] as $postcode) {
         if (isset($termArray->terms[$postcode])) {
             // if the postcode already got in another way, we don't add it again
             foreach ($termArray->terms[$postcode] as $term) {
                 $term->isPostcode = true;
                 $term->postcodeCountry = "ca";
             }
         } else {
             $positions = [-10];
             $postcodeWords = explode(" ", $postcode);
             // all post codes have 1 or 2 words
             $firstWordPositions = array_keys($words, $postcodeWords[0]);
             if (count($postcodeWords) == 1) {
                 $positions = $firstWordPositions;
             } else {
                 foreach ($firstWordPositions as $firstWordPosition) {
                     if ($words[$firstWordPosition + 1] == $postcodeWords[1]) {
                         if ($positions == [-10]) {
                             $positions = [$firstWordPosition];
                         } else {
                             $postions[] = $firstWordPosition;
                         }
                     }
                 }
             }
             foreach ($positions as $postcodePosition) {
                 $newTerm = $termArray->addTerm($postcode, $postcodePosition);
                 $newTerm->isPostcode = true;
                 $newTerm->postcodeCountry = "ca";
             }
         }
     }
     foreach ($USZipCodes[0] as $postcode) {
         if (isset($termArray->terms[$postcode])) {
             // if the postcode already got in another way, we don't add it again
             foreach ($termArray->terms[$postcode] as $term) {
                 $term->isPostcode = true;
                 $term->postcodeCountry = "us";
             }
         } else {
             $positions = [-10];
             $postcodeWords = explode(" ", $postcode);
             // all post codes have 1 or 2 words
             $firstWordPositions = array_keys($words, $postcodeWords[0]);
             if (count($postcodeWords) == 1) {
                 $positions = $firstWordPositions;
             } else {
                 foreach ($firstWordPositions as $firstWordPosition) {
                     if ($words[$firstWordPosition + 1] == $postcodeWords[1]) {
                         if ($positions == [-10]) {
                             $positions = [$firstWordPosition];
                         } else {
                             $postions[] = $firstWordPosition;
                         }
                     }
                 }
             }
             foreach ($positions as $postcodePosition) {
                 $newTerm = $termArray->addTerm($postcode, $postcodePosition);
                 $newTerm->isPostcode = true;
                 $newTerm->postcodeCountry = "us";
             }
         }
     }
     foreach ($DutchPostCodes[0] as $postcode) {
         if (isset($termArray->terms[$postcode])) {
             // if the postcode already got in another way, we don't add it again
             foreach ($termArray->terms[$postcode] as $term) {
                 $term->isPostcode = true;
                 $term->postcodeCountry = "nl";
             }
         } else {
             $positions = [-10];
             $postcodeWords = explode(" ", $postcode);
             // all post codes have 1 or 2 words
             $firstWordPositions = array_keys($words, $postcodeWords[0]);
             if (count($postcodeWords) == 1) {
                 $positions = $firstWordPositions;
             } else {
                 foreach ($firstWordPositions as $firstWordPosition) {
                     if ($words[$firstWordPosition + 1] == $postcodeWords[1]) {
                         if ($positions == [-10]) {
                             $positions = [$firstWordPosition];
                         } else {
                             $postions[] = $firstWordPosition;
                         }
                     }
                 }
             }
             foreach ($positions as $postcodePosition) {
                 $newTerm = $termArray->addTerm($postcode, $postcodePosition);
                 $newTerm->isPostcode = true;
                 $newTerm->postcodeCountry = "nl";
             }
         }
     }
     $this->times["Update metadata for post codes"] = microtime(true) - $startTime;
     return $termArray;
 }