public function GetTodai($ProsodyText, $TodaiType) { $ProsodyText = RemovePunctuation($ProsodyText); // Removing Punctuation // and // reformatting the // text. $Lines = explode(PHP_EOL, trim($ProsodyText)); // Seperating the // lines of the // text. $TodaiLineIndex = array(); $TodaiList = array(); /* * Compare each word in a line, with the rest of the words if words * match, place them in an array iterate again with the next word skip * already matched words */ foreach ($Lines as $Line) { $Words = explode(" ", $Line); $TodaiWordIndex = array(); $TodaiIndex = array(); $TodaiIndex[] = array(0 => $Words[0]); for ($NewIndex = 1; $NewIndex < count($Words); $NewIndex++) { if ($TodaiType == "mOVY") { $TodaiCheck = $this->CheckMonai($Words[0], $Words[$NewIndex]); } if ($TodaiType == "_etukY") { $TodaiCheck = $this->CheckEtukai($Words[0], $Words[$NewIndex]); } if ($TodaiType == "_iyYpu") { $TodaiCheck = $this->CheckIyaipu($Words[0], $Words[$NewIndex]); } if ($TodaiCheck) { $TodaiIndex[] = array($NewIndex => $Words[$NewIndex]); $TodaiList[] = $NewIndex; } } $TodaiWordIndex[] = $TodaiIndex; $TodaiLineIndex[] = $TodaiWordIndex; $TodaiList = array(); } return $TodaiLineIndex; }
public function GetTextSyllablePattern($ProsodyText) { $ProsodyText = preg_replace("/\\(.*\\)/", "", $ProsodyText); // remov // paranthesized // words $ProsodyText = RemovePunctuation($ProsodyText); // Removing Punctuation // and // reformatting the // text. $Lines = explode(PHP_EOL, trim($ProsodyText)); // Seperating the // lines of the // text. $Lines = preg_replace("/\\s\$/", "", $Lines); // remove unnecessary // spaces $LineList = array(); $LineCount = 1; foreach ($Lines as $Line) { $Words = explode(" ", trim($Line)); $WordList = array(); $WordCount = 1; foreach ($Words as $Word) { $WordSyllable = array(); // $Word=str_replace(array("W","Y"),array("B","Q"),$Word); // // B-aukarakurukkam Q-Aikaarakurukkam $Word = preg_replace("/(\\b.)B/", "\$1W", $Word); $Word = preg_replace("/(\\b.)Q/", "\$1Y", $Word); /* Get Niraipu Words */ preg_match_all('/([kGcJTNtnpmyrlvZLRVjSsh]?_?[aiueoBQ])([kGcJTNtnpmyrlvZLRVjSsh][aAiIuUeEoOYWBQ])(_[KkGcJTNtnpmyrlvZLRVjSsh])*([kGcJTNtnpmyrlvZLRVjSsh]u)/', $Word, $WordClassNiraipu, PREG_OFFSET_CAPTURE); foreach ($WordClassNiraipu[0] as $Niraipu) { $WordSyllable[$Niraipu[1]] = array('nirYpu' => $Niraipu[0]); $chr = ""; for ($i = 0; $i < strlen($Niraipu[0]); $i++) { $chr = $chr . "^"; } $Word = preg_replace("/" . $Niraipu[0] . "/", $chr, $Word, 1); } /* Get Nerpu Words */ preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?_?[AIUEOQYBW](_[KkGcJTNtnpmyrlvZLRVjSsh])*([kGcJTNtnpmyrlvZLRVjSsh]u)/', $Word, $WordClassNerpu, PREG_OFFSET_CAPTURE); // preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?[aAiIuUeEoOYWBQ](_[KkGcJTNtnpmyrlvZLRVjSsh])*/',$wrd,$ner,PREG_OFFSET_CAPTURE); if (!empty($WordClassNerpu)) { foreach ($WordClassNerpu[0] as $Nerpu) { $WordSyllable[$Nerpu[1]] = array('nE_rpu' => $Nerpu[0]); $chr = ""; for ($i = 0; $i < strlen($Nerpu[0]); $i++) { $chr = $chr . "^"; } $Word = preg_replace("/" . $Nerpu[0] . "/", $chr, $Word, 1); } } /* Get Nerpu Words */ preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?_?[aAiIuUeEoOQYBW](_[KkGcJTNtnpmyrlvZLRVjSsh])+([kGcJTNtnpmyrlvZLRVjSsh]u)/', $Word, $WordClassNerpu, PREG_OFFSET_CAPTURE); // preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?[aAiIuUeEoOYWBQ](_[KkGcJTNtnpmyrlvZLRVjSsh])*/',$wrd,$ner,PREG_OFFSET_CAPTURE); if (!empty($WordClassNerpu)) { foreach ($WordClassNerpu[0] as $Nerpu) { $WordSyllable[$Nerpu[1]] = array('nE_rpu' => $Nerpu[0]); $chr = ""; for ($i = 0; $i < strlen($Nerpu[0]); $i++) { $chr = $chr . "^"; } $Word = preg_replace("/" . $Nerpu[0] . "/", $chr, $Word, 1); } } /* Get Nirai Words */ preg_match_all('/([kGcJTNtnpmyrlvZLRVjSsh]?_?[aiueoBQ])([kGcJTNtnpmyrlvZLRVjSsh][aAiIuUeEoOYWBQ])(_[KkGcJTNtnpmyrlvZLRVjSsh])*/', $Word, $WordClassNirai, PREG_OFFSET_CAPTURE); foreach ($WordClassNirai[0] as $Nirai) { $WordSyllable[$Nirai[1]] = array('nirY' => $Nirai[0]); $chr = ""; for ($i = 0; $i < strlen($Nirai[0]); $i++) { $chr = $chr . "^"; } $Word = preg_replace("/" . $Nirai[0] . "/", $chr, $Word, 1); } /* Get Ner Words */ preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?_?[aAiIuUeEoOQYBW](_[KkGcJTNtnpmyrlvZLRVjSsh])*/', $Word, $WordClassNer, PREG_OFFSET_CAPTURE); // preg_match_all('/[kGcJTNtnpmyrlvZLRVjSsh]?[aAiIuUeEoOYWBQ](_[KkGcJTNtnpmyrlvZLRVjSsh])*/',$wrd,$ner,PREG_OFFSET_CAPTURE); if (!empty($WordClassNer)) { foreach ($WordClassNer[0] as $Ner) { $WordSyllable[$Ner[1]] = array('nE_r' => $Ner[0]); } } ksort($WordSyllable); $Syllable = array(); $SyllableCount = 1; $WordPattern = ""; foreach ($WordSyllable as $key => $value) { $Syllable["acY-" . $SyllableCount++] = $value; foreach ($value as $Class => $ClassWord) { $WordPattern = $WordPattern . $Class; } } if (!empty($WordPattern)) { $Syllable["meta"] = $this->WordType[$WordPattern]; } else { $WordCount--; } $WordList["cI_r-" . $WordCount++] = $Syllable; } $WordList["smeta"] = --$WordCount; $LineList["aTi-" . $LineCount++] = $WordList; } $this->TotalLines = --$LineCount; return array("pA" => $LineList); }