function isSimpleQuranWord($str) { if (removeTashkeel($str) == $str) { return true; } return false; }
function printResultVerses($scoringTable, $lang, $direction, $query, $isPhraseSearch, $isQuestion, $script, $significantCollocationWords = null, $isTransliterationSearch = false) { global $script, $TRANSLITERATION_VERSES_MAP; $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", ""); $QURAN_TEXT_UTH = getModelEntryFromMemory("AR_UTH", "MODEL_CORE", "QURAN_TEXT", ""); $META_DATA = getModelEntryFromMemory($lang, "MODEL_CORE", "META_DATA", ""); $TOTALS = getModelEntryFromMemory($lang, "MODEL_CORE", "TOTALS", ""); if ($lang == "EN") { if ($script == "simple") { $QURAN_TEXT_OTHER_LANG = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", ""); } else { $QURAN_TEXT_OTHER_LANG = $QURAN_TEXT_UTH; } } else { $QURAN_TEXT_OTHER_LANG = getModelEntryFromMemory("EN", "MODEL_CORE", "QURAN_TEXT", ""); } //preprint_r($scoringTable);exit; $searchResultsTextArr = array(); //preprint_r($scoringTable);exit; $relevanceReverseOrderIndex = count($documentScoreArr); foreach ($scoringTable as $documentID => $documentScoreArr) { //preprint_r($documentScoreArr); $relevanceReverseOrderIndex--; $SURA = $documentScoreArr['SURA']; $AYA = $documentScoreArr['AYA']; $TEXT = $QURAN_TEXT[$SURA][$AYA]; $TEXT_UTH = $QURAN_TEXT_UTH[$SURA][$AYA]; $TEXT_TRANSLITERATED = cleanTransliteratedText($TRANSLITERATION_VERSES_MAP[$SURA + 1 . ":" . ($AYA + 1)]); $WORD_TYPE = $documentScoreArr['WORD_TYPE']; $EXTRA_INFO = $documentScoreArr['EXTRA_INFO']; $INDEX_IN_AYA_EMLA2Y = $documentScoreArr['INDEX_IN_AYA_EMLA2Y']; $WORDS_IN_AYA = $documentScoreArr['POSSIBLE_HIGHLIGHTABLE_WORDS']; $PRONOUNS = $documentScoreArr['PRONOUNS']; $score = $documentScoreArr['SCORE']; $searchResultsTextArr[] = $TEXT; $TEXT_TRANSLATED = $QURAN_TEXT_OTHER_LANG[$SURA][$AYA]; $SURA_NAME = $META_DATA['SURAS'][$SURA]['name_' . strtolower($lang)]; $SURA_NAME_LATIN = $META_DATA['SURAS'][$SURA]['name_trans']; // وكذلك جلناكم امة وسطا 143/256 $TOTAL_VERSES_OF_SURA = $TOTALS['TOTAL_PER_SURA'][$SURA]['VERSES']; //preprint_r($MODEL['QURAN_TEXT']); $MATCH_TYPE = ""; if ($WORD_TYPE == "PRONOUN_ANTECEDENT") { $MATCH_TYPE = "ضمير"; if ($lang == "EN") { $MATCH_TYPE = "pronoun"; } } else { if ($WORD_TYPE == "ROOT" || $WORD_TYPE == "LEM") { $MATCH_TYPE = "تصريف / إشتقاق"; } } // empty in case of only pronouns if (!empty($WORDS_IN_AYA)) { if ($isPhraseSearch) { // mark all POSSIBLE_HIGHLIGHTABLE_WORDS $TEXT = preg_replace("/(" . $query . ")/mui", "<marked>\\1</marked>", $TEXT); } else { // mark all POSSIBLE_HIGHLIGHTABLE_WORDS $TEXT = preg_replace("/(" . join("|", array_keys($WORDS_IN_AYA)) . ")/mui", "<marked>\\1</marked>", $TEXT); if ($isTransliterationSearch) { $TEXT_TRANSLITERATED = preg_replace("/(" . join("|", array_keys($WORDS_IN_AYA)) . ")/mui", "<marked>\\1</marked>", $TEXT_TRANSLITERATED); } } } //preprint_r($PRONOUNS); // mark PRONOUNS //if ( $WORD_TYPE=="PRONOUN_ANTECEDENT") {} // COMMENTED SINCE WORD MAY HAVE BOTH PRON AND NORMAKL WORDS foreach ($PRONOUNS as $pronounText => $PRONOUN_INDEX_IN_AYA_EMLA2Y) { $pronounText = removeTashkeel($pronounText); $TEXT = markSpecificWordInText($TEXT, $PRONOUN_INDEX_IN_AYA_EMLA2Y - 1, $pronounText, "marked"); //$TEXT = preg_replace("/(".$EXTRA_INFO.")/mui", "<marked>\\1</marked>", $TEXT); //echoN("|".$TEXT); } if ($isQuestion) { //preprint_r($significantCollocationWords); foreach ($significantCollocationWords as $word => $freq) { $TEXT = markWordWithoutWordIndex($TEXT, $word, "marked_prospect_answer"); //$TEXT = preg_replace("/(".$EXTRA_INFO.")/mui", "<marked>\\1</marked>", $TEXT); //echoN("|".$TEXT); } } $documentID = preg_replace("/\\:/", "-", $documentID); //preprint_r($documentScoreArr); ?> <div class='result-aya-container' order='<?php echo $SURA + 1; ?> ' relevance='<?php echo $relevanceReverseOrderIndex; ?> ' > <div class='result-aya' style="direction:<?php echo $direction; ?> " id="<?php echo $documentID; ?> " > <?php if ($script == "uthmani" && $lang == "AR") { echo $TEXT_UTH; } else { echo $TEXT; if ($isTransliterationSearch) { echo "<hr class='transliteration-separator'/>"; echo "<div class='transliteration-verse-text-area'>{$TEXT_TRANSLITERATED}</div>"; } } ?> <div id="<?php echo $documentID; ?> -translation" class='result-translated-text' style="direction:<?php echo $lang == "AR" ? "ltr" : "rtl"; ?> " > <?php echo $TEXT_TRANSLATED; ?> </div> </div> <div class='result-aya-info' > <span class='result-sura-info' style="direction:<?php echo $direction; ?> "> <?php echo $SURA_NAME; ?> <?php if ($lang == "EN") { echo " ({$SURA_NAME_LATIN})"; } ?> [<?php echo $SURA + 1 . ":" . ($AYA + 1); ?> ] <?php php; ?> </span> <span class='result-aya-showtranslation' > <?php $showTransText = "Show Translation"; if ($lang == "EN") { $showTransText = "Show Arabic"; } ?> <a href="javascript:showTranslationFor('<?php echo $documentID; ?> ')"><?php echo $showTransText; ?> </a> </span> <span class='result-more-about-aya'> <a target='_new' href='http://quran.com/<?php echo $SURA + 1 . "/" . ($AYA + 1); ?> '> More </a> </span> <span class='result-match-type'> <?php echo $MATCH_TYPE; ?> </span> </div> </div> <?php } return $searchResultsTextArr; }
/** Returns words from QAC by PoS tags - grouped by lemma **/ function getWordsByPos(&$finalTerms, $POS) { global $LEMMA_TO_SIMPLE_WORD_MAP; $qacPosEntryArr = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_POS", $POS); $QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", ""); $TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", ""); $PAUSEMARKS = $TOTALS['PAUSEMARKS']; $WORDS_FREQUENCY = getModelEntryFromMemory("AR", "MODEL_CORE", "WORDS_FREQUENCY", ""); // Get all segment in QAC for that PoS foreach ($qacPosEntryArr as $location => $segmentId) { $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location); // get Word, Lema and root $segmentWord = $qacMasterTableEntry[$segmentId - 1]['FORM_AR']; $segmentWordLema = $qacMasterTableEntry[$segmentId - 1]['FEATURES']['LEM']; $segmentWordRoot = $qacMasterTableEntry[$segmentId - 1]['FEATURES']['ROOT']; $verseLocation = substr($location, 0, strlen($location) - 2); //$segmentWord = removeTashkeel($segmentWord); // get word index in verse $wordIndex = getWordIndexFromQACLocation($location); //$segmentFormARimla2y = $UTHMANI_TO_SIMPLE_WORD_MAP_AND_VS[$segmentWord]; // get simple version of the word index $imla2yWordIndex = getImla2yWordIndexByUthmaniLocation($location); // get verse text $verseText = getVerseByQACLocation($QURAN_TEXT, $location); //echoN("|$segmentWord|$imla2yWord"); $segmentWordNoTashkeel = removeTashkeel($segmentWordLema); $superscriptAlef = json_decode('"\\u0670"'); $alefWasla = "ٱ"; //U+0671 //$imla2yWord = $LEMMA_TO_SIMPLE_WORD_MAP[$segmentWordLema]; // this block is important since $LEMMA_TO_SIMPLE_WORD_MAP is not good for non $superscriptAlef words // ex زيت lemma is converted to زيتها which spoiled the ontology concept list results if (mb_strpos($segmentWordLema, $superscriptAlef) !== false || mb_strpos($segmentWordLema, $alefWasla) !== false) { $imla2yWord = getItemFromUthmaniToSimpleMappingTable($segmentWordLema); if (empty($imla2yWord)) { $imla2yWord = $LEMMA_TO_SIMPLE_WORD_MAP[$segmentWordLema]; } } else { $imla2yWord = getItemFromUthmaniToSimpleMappingTable($segmentWordLema); if (empty($imla2yWord)) { $imla2yWord = shallowUthmaniToSimpleConversion($segmentWordLema); //$segmentWordNoTashkeel; } } /// in case the word was not found after removing tashkeel, try the lema mappign table $termWeightArr = $MODEL_CORE['WORDS_FREQUENCY']['WORDS_TFIDF'][$imla2yWord]; // NOT WORKING BECAUSE LEMMAS WILL NOT BE IN SIMPLE WORDS LIST و الصابيئن =>صَّٰبِـِٔين // if the word after removing tashkeel is not found in quran simple words list, then try lemma table /*if (!isset($MODEL_CORE['WORDS_FREQUENCY']['WORDS'][$imla2yWord]) ) { $imla2yWord = $LEMMA_TO_SIMPLE_WORD_MAP[$segmentWordLema]; if ( empty($imla2yWord) ) { echoN($segmentWordLema); echoN($imla2yWord); preprint_r($LEMMA_TO_SIMPLE_WORD_MAP); preprint_r($MODEL_CORE['WORDS_FREQUENCY']['WORDS']); exit; } }*/ if (empty($termWeightArr)) { //only for weight since the lema table decrease qurana matching $imla2yWordForWeight = $LEMMA_TO_SIMPLE_WORD_MAP[$segmentWordLema]; $termWeightArr = $WORDS_FREQUENCY['WORDS_TFIDF'][$imla2yWordForWeight]; } $termWeight = $termWeightArr['TFIDF']; //////////////////////////////////////////// $termWord = $segmentWordLema; //$imla2yWord;//"|$segmentWord| ".$imla2yWord ." - $location:$segmentId - $wordIndex=$imla2yWordIndex"; if (!isset($finalTerms[$termWord])) { $finalTerms[$termWord] = generateEmptyConceptMetadata(); $finalTerms[$termWord]['LEM'] = $segmentWordLema; $finalTerms[$termWord]['POS'] = $POS; $finalTerms[$termWord]['SIMPLE_WORD'] = $imla2yWord; $finalTerms[$termWord]['ROOT'] = $segmentWordRoot; $finalTerms[$termWord]['WEIGHT'] = $termWeight; } $finalTerms[$termWord]["FREQ"] = $finalTerms[$termWord]["FREQ"] + 1; if (!isset($finalTerms[$termWord]["SEG"][$segmentWord])) { $finalTerms[$termWord]["SEG"][$segmentWord] = $imla2yWord; } if (!isset($finalTerms[$termWord]["POSES"][$POS])) { $finalTerms[$termWord]["POSES"][$POS] = 1; } } return $finalTerms; }
//echoN("SP:".$startOfRule); // GET ALL POS WHICH IS FOUND BEFORE THE PATTERN IN THE PATTERN STRING $prePatternStr = substr($ssPoSPattern, 0, $startOfRule); // SOME STATISTICS $numberOfWordsPrePattern = preg_match_all("/,/", $prePatternStr); $numberOfWordsInRule = preg_match_all("/,/", $rule) + 1; //echoN("# of words prepattern:".$numberOfWordsPrePattern); //echoN("# of words in rule:".$numberOfWordsInRule); $startArrayIndexOfPattern = $numberOfWordsPrePattern; $verseId = substr($location, 0, strlen($location) - 2); // QAC INDEX OF FIRST CORRSPONDING WORD IN THE PATTERN $qacStartWordIndexInVerse = $qacIndexes[$startArrayIndexOfPattern]; $qacBaseLocation = $verseId; //echoN($qacLocation); // IF THE SUBVERSE CONTAIN CONDITIONS OR VOCATIVES, IGNORE THE WHOLE SUBVERSE if (preg_match("/VOC|COND|INTG/", $prePatternStr) || mb_strpos(removeTashkeel($subSentenceStr), "قال") !== false) { continue; } //preprint_r($qacWordSegmentsArr); $prevPoS = $prevWord = null; if ($startArrayIndexOfPattern - 1 >= 0) { $prevPoS = $posArr[$startArrayIndexOfPattern - 1]; $prevWord = $wordsArr[$startArrayIndexOfPattern - 1]; } switch ($rule) { case "V PRON PRON": // RESOLVE PRONOUNS $qacWordLocation = $qacBaseLocation . ":" . $qacStartWordIndexInVerse; $pronounConceptArr = resolvePronouns($qacWordLocation); $subject = $pronounConceptArr[0]; $object = $pronounConceptArr[1];