Exemplos de TermArray em PHP

Linguagem de programação: PHP

Classe / Tipo: TermArray

Exemplos em hotexamples.com: 2

TermArray em PHP - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de TermArray em PHP extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

addTerm(2)

countLocations(1)

countNounsAfterPrepositions(1)

getAllTerms(1)

getAlternateWeights(1)

getWeights(1)

removeNouns(1)

weightHelper(1)

Métodos Frequentes

addTerm (2)

countLocations (1)

countNounsAfterPrepositions (1)

getAllTerms (1)

getAlternateWeights (1)

getWeights (1)

removeNouns (1)

weightHelper (1)

Relacionados

Model_videos

Kendo\Dataviz\UI\SparklineValueAxisItem

Configuration

SellHostedData

pingWeblogs

wiki_include

update0782to080

FormAction

userstatus_drop_down

forum_get_default

Related in langs

UploadHelper (C#)

OrderBiz (C#)

opal_init_util (C++)

pcap_compile (C++)

GetAuth (Go)

FindFilePath (Go)

EmbeddedObjectContainer (Java)

WaterfallData (Java)

ShowcaseImage (Python)

_build_global (Python)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TestTerms.php Projeto: spotzi/Geotagger

$termArray = new TermArray(); $termArray->addTerm("New", 0); $termArray->addTerm("York", 1); $termArray->addTerm("City", 2); $termArray->addTerm("New York", 0); $termArray->addTerm("York City", 1); $weightHelperResult = $termArray->weightHelper($termArray->getAllTerms()); echo arrayToString($weightHelperResult); //var_dump($termArray->findGroups()); echo " Weights: "; var_dump($termArray->getWeights()); echo " Alternate weights: "; var_dump($termArray->getAlternateWeights()); echo " "; echo "Test 10: The, New, York, City, New York, York City, New York City, The New, The New York, The New York City Expected: Actual: "; $termArray = new TermArray(); $termArray->addTerm("The", 0); $termArray->addTerm("New", 1); $termArray->addTerm("York", 2); $termArray->addTerm("City", 3); $termArray->addTerm("The New", 0); $termArray->addTerm("New York", 1); $termArray->addTerm("York City", 2); $termArray->addTerm("New York City", 1); $termArray->addTerm("The New York", 0); $termArray->addTerm("The New York City", 0); $weightHelperResult = $termArray->weightHelper($termArray->getAllTerms()); echo arrayToString($weightHelperResult); echo " Weights: "; var_dump($termArray->getWeights()); echo " ";

Exemplo n.º 2

0

Exibir arquivo

Arquivo: TextParser.php Projeto: spotzi/Geotagger

function run($text) { // The following appear in some Wikipedia texts and mess up the parsing: $textsToRemove = [" ", "</table>", "</dl>", "</ref>", "<ns>", "</ns>", "<id>", "</id>", "", "<revision>", "<comment>", "</comment>", "<model>", "</model>", "<parentid>", "</parentid>"]; $text = str_replace($textsToRemove, "", $text); $currentDir = getcwd(); // needed for absolute paths to Stanford models $termArray = new TermArray(); $startTime = microtime(true); if (!function_exists("splitIntoWords")) { // The POS and NER taggers need an array of arrays, where each sentence is // it's own array. function splitIntoWords($sentence) { return explode(' ', $sentence); } } $text_arrays = array_map("splitIntoWords", explode('.', $text)); // Send the text to the POS tagger: $pos = new \StanfordNLP\POSTagger($currentDir . '/stanford-postagger-2015-04-20/models/english-left3words-distsim.tagger', $currentDir . '/stanford-postagger-2015-04-20/stanford-postagger.jar'); $startTime = microtime(true); $resultPOS = $pos->batchTag($text_arrays)[0]; $this->times["Run the POS tagger"] = microtime(true) - $startTime; if (printOutput()) { echo " POS results: "; var_dump($resultPOS); echo " "; } if (!$resultPOS) { echo " ERROR: POS tagging failed "; return false; } // Send the text to the NER: $ner = new \StanfordNLP\NERTagger($currentDir . '/stanford-ner-2015-04-20/classifiers/english.all.3class.distsim.crf.ser.gz', $currentDir . '/stanford-ner-2015-04-20/stanford-ner.jar'); $startTime = microtime(true); $resultNER = $ner->batchTag($text_arrays)[0]; $this->times["Run the NER tagger"] = microtime(true) - $startTime; if (printOutput()) { echo " NER results: "; var_dump($resultNER); echo " "; } if (!$resultNER) { echo " ERROR: NER tagging failed "; return false; } // Later code assumes that $resultPOS and $resultNER are indexed identically. // I have only seen these errors returned when the text contains something like // " " which is handled differently by each tagger if (count($resultPOS) != count($resultNER)) { echo " ERROR: POS and NER tagging are not indexed the same! "; return false; } $words = []; for ($i = 0, $size = count($resultNER); $i < $size; $i++) { if ($resultPOS[$i][0] != $resultNER[$i][0]) { echo " ERROR: POS and NER tagging are not indexed the same! "; return false; } $words[$i] = $resultNER[$i][0]; } // The next bunch of code loops through the text to find all terms $startTime = microtime(true); $currentStreak = 0; $streakContainsLocation = false; $streakContainsNoun = false; $isAfterPreposition = false; $isAfterConjunction = false; for ($i = 0, $size = count($resultPOS); $i < $size; $i++) { $isNoun = strncmp($resultPOS[$i][1], "NN", 2) == 0; $isAdjectiveOrNumber = (strcmp($resultPOS[$i][1], "CD") == 0 or strcmp($resultPOS[$i][1], "JJ") == 0); // adjective (so "first avenue" would catch the first) $isLocation = strcmp($resultNER[$i][1], "LOCATION") == 0; if ($isNoun or $isAdjectiveOrNumber or $isLocation) { $currentStreak++; if ($isLocation) { $streakContainsLocation = true; } if ($isNoun) { $streakContainsNoun = true; } } else { $streakContainsLocation = false; $streakContainsNoun = false; $currentStreak = 0; $isAfterConjunction = strcmp($resultPOS[$i][1], "CC") == 0; if (!$isAfterConjunction) { // reset $isAfterPreposition only if this is not after a conjuction // that way a text like "near Waterloo and Guelph" will tag both Waterloo // and Guelph as after a preposition $isAfterPreposition = (strcmp($resultPOS[$i][1], "IN") == 0 or strcmp($resultPOS[$i][1], "TO") == 0); if (strcmp($resultPOS[$i][0], "for") == 0) { // TODO: make this case insensitive? $isAfterPreposition = false; } } } if ($streakContainsLocation or $streakContainsNoun) { $phrase = $resultPOS[$i][0]; $subStreakContainsNoun = $isNoun; $subStreakContainsLocation = $isLocation; if ($isNoun or $isLocation) { $newTerm = $termArray->addTerm($phrase, $i); $newTerm->isNoun = $isNoun; $newTerm->isLocation = $isLocation; $newTerm->isAfterPreposition = $isAfterPreposition; } for ($j = 1; $j < $currentStreak; $j++) { $phrase = $resultPOS[$i - $j][0] . ' ' . $phrase; $subStreakContainsNoun = ($subStreakContainsNoun or strncmp($resultPOS[$i - $j][1], "NN", 2) == 0); $subStreakContainsLocation = ($subStreakContainsLocation or strcmp($resultNER[$i - $j][1], "LOCATION") == 0); if ($subStreakContainsLocation or $subStreakContainsNoun) { $newTerm = $termArray->addTerm($phrase, $i - $j); $newTerm->isNoun = $subStreakContainsNoun; $newTerm->isLocation = $subStreakContainsLocation; $newTerm->isAfterPreposition = $isAfterPreposition; } } } } $this->times["Loop through text to find locations"] = microtime(true) - $startTime; // Now we remove some terms from the array: $startTime = microtime(true); if ($termArray->terms) { if ($termArray->countLocations() > 0) { if (printOutput()) { echo " This text contains words tagged as locations, so we will only consider those words. "; } $termArray->removeNouns(false); $this->termTypeUsed = "Locations"; } else { if (printOutput()) { echo " This text does not contain words tagged as locations, so we must only use nouns. "; } if ($termArray->countNounsAfterPrepositions() > 0) { if (printOutput()) { echo "Some nouns occured after prepositions, so we will only use those. "; } $termArray->removeNouns(true); $this->termTypeUsed = "NounsAfterPrep"; } else { $this->termTypeUsed = "Nouns"; } } } else { echo "Warning: no nouns or locations found in text."; $this->termTypeUsed = "None"; } $this->times["Filter terms that are found"] = microtime(true) - $startTime; // the rest of this code deals with postal codes $startTime = microtime(true); $CanadaPostCodes = []; $USZipCodes = []; $DutchPostCodes = []; preg_match_all('/\\b[a-zA-Z][0-9][a-zA-Z][\\s]?[0-9][a-zA-Z][0-9]\\b/', $text, $CanadaPostCodes); preg_match_all('/\\b[0-9]{5}([\\s\\-][0-9]{4})?\\b/', $text, $USZipCodes); preg_match_all('/\\b[0-9]{4}[\\s]?[a-zA-Z]{2}\\b/', $text, $DutchPostCodes); $this->times["Find postal codes in the text"] = microtime(true) - $startTime; $startTime = microtime(true); foreach ($CanadaPostCodes[0] as $postcode) { if (isset($termArray->terms[$postcode])) { // if the postcode already got in another way, we don't add it again foreach ($termArray->terms[$postcode] as $term) { $term->isPostcode = true; $term->postcodeCountry = "ca"; } } else { $positions = [-10]; $postcodeWords = explode(" ", $postcode); // all post codes have 1 or 2 words $firstWordPositions = array_keys($words, $postcodeWords[0]); if (count($postcodeWords) == 1) { $positions = $firstWordPositions; } else { foreach ($firstWordPositions as $firstWordPosition) { if ($words[$firstWordPosition + 1] == $postcodeWords[1]) { if ($positions == [-10]) { $positions = [$firstWordPosition]; } else { $postions[] = $firstWordPosition; } } } } foreach ($positions as $postcodePosition) { $newTerm = $termArray->addTerm($postcode, $postcodePosition); $newTerm->isPostcode = true; $newTerm->postcodeCountry = "ca"; } } } foreach ($USZipCodes[0] as $postcode) { if (isset($termArray->terms[$postcode])) { // if the postcode already got in another way, we don't add it again foreach ($termArray->terms[$postcode] as $term) { $term->isPostcode = true; $term->postcodeCountry = "us"; } } else { $positions = [-10]; $postcodeWords = explode(" ", $postcode); // all post codes have 1 or 2 words $firstWordPositions = array_keys($words, $postcodeWords[0]); if (count($postcodeWords) == 1) { $positions = $firstWordPositions; } else { foreach ($firstWordPositions as $firstWordPosition) { if ($words[$firstWordPosition + 1] == $postcodeWords[1]) { if ($positions == [-10]) { $positions = [$firstWordPosition]; } else { $postions[] = $firstWordPosition; } } } } foreach ($positions as $postcodePosition) { $newTerm = $termArray->addTerm($postcode, $postcodePosition); $newTerm->isPostcode = true; $newTerm->postcodeCountry = "us"; } } } foreach ($DutchPostCodes[0] as $postcode) { if (isset($termArray->terms[$postcode])) { // if the postcode already got in another way, we don't add it again foreach ($termArray->terms[$postcode] as $term) { $term->isPostcode = true; $term->postcodeCountry = "nl"; } } else { $positions = [-10]; $postcodeWords = explode(" ", $postcode); // all post codes have 1 or 2 words $firstWordPositions = array_keys($words, $postcodeWords[0]); if (count($postcodeWords) == 1) { $positions = $firstWordPositions; } else { foreach ($firstWordPositions as $firstWordPosition) { if ($words[$firstWordPosition + 1] == $postcodeWords[1]) { if ($positions == [-10]) { $positions = [$firstWordPosition]; } else { $postions[] = $firstWordPosition; } } } } foreach ($positions as $postcodePosition) { $newTerm = $termArray->addTerm($postcode, $postcodePosition); $newTerm->isPostcode = true; $newTerm->postcodeCountry = "nl"; } } } $this->times["Update metadata for post codes"] = microtime(true) - $startTime; return $termArray; }