<th>
								Uthmani
							</th>
							<th>
								Simple
							</th>
							</tr>
							
							<?php 
$uthmaniCounter = 0;
$simpleCounter = 0;
$qaOntologyConceptsIterator = getAPCIterator("AR\\/OTHERS\\/UTHMANI_TO_SIMPLE_WORD_MAP\\/.*");
foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
    $mapTermKey = getEntryKeyFromAPCKey($conceptsCursor['key']);
    $mapTermVal = $conceptsCursor['value'];
    if (isSimpleQuranWord($mapTermKey)) {
        $simpleCounter++;
        //echoN("##".$mapTermKey);
        continue;
    }
    $uthmaniCounter++;
    ?>
								<tr>
									<td><?php 
    echo $mapTermKey;
    ?>
</td>
									
									<td><?php 
    echo $mapTermVal;
    ?>
                    $noOntologyExtentionConstraint = true;
                }
            }
        }
    }
}
//preprint_r($columnSearchKeyValParams);exit;
//echoN("IS QUESTION:$isQuestion");
//echoN("noOntologyExtentionConstraint:$noOntologyExtentionConstraint");
//echoN("noDerivationsConstraint:$noDerivationsConstraint");
/// CLEANING
$query = cleanAndTrim($query);
//$query = removeTashkeel($query);
//  remove tashkeel - convert from uthmani to simple
// didn't use remove tashkeel since it leaves "hamzet el wasl" which is not in the simple text
if (!isSimpleQuranWord($query)) {
    $query = convertUthamniQueryToSimple($query);
}
// CASE HANDLING
if ($lang == "EN") {
    $query = strtolower($query);
    $query = removeSpecialCharactersFromMidQuery($query);
} else {
    $query = removeNonArabicAndSpaceChars($query);
}
$originalQueryWordsArr = preg_split("/ /", $query);
//for faster access
$originalQueryWordsArrSwaped = swapAssocArrayKeyValues($originalQueryWordsArr);
//echoN(memory_get_peak_usage());
// CHECK IF TRANSLITERATION
if ($lang == "EN" && !$isConceptSearch && !$isPhraseSearch && !$isQuestion) {
Example #3
0
function getWordInfo($word, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, $fast = FALSE, $exactWord = FALSE)
{
    $wordInfoArr = array();
    $word = trim($word);
    $wordUthmani = "";
    $wordSimple = "";
    if (isSimpleQuranWord($word)) {
        $wordUthmani = getItemFromUthmaniToSimpleMappingTable($word);
        $wordSimple = $word;
    } else {
        $wordUthmani = $word;
        //preprint_r($UTHMANI_TO_SIMPLE_WORD_MAP_VS);
        // tashkeel of last char is significant, ex: lemmas will probably not be in the MAP because of that
        $wordSimple = getItemFromUthmaniToSimpleMappingTable($wordUthmani);
    }
    $WORDS_FREQUENCY = getModelEntryFromMemory("AR", "MODEL_CORE", "WORDS_FREQUENCY", "");
    $freqArr = $WORDS_FREQUENCY['WORDS_TFIDF'][$wordSimple];
    //preprint_r($freqArr);
    $wordInfoArr['WORD_SIMPLE'] = $wordSimple;
    $wordInfoArr['WORD_UTHMANI'] = $wordUthmani;
    /*echoN("Simple:".$wordSimple);
    		 echoN("Uthmani:".$wordUthmani);
    		
    		
    		
    		echoN("Repetition:".$freqArr['TF']);
    		echoN("TF-IDF Weight:".round($freqArr['TFIDF'],2));
    		
    		
    		
    		*/
    $wordInfoArr['TF'] = $freqArr['TF'];
    $wordInfoArr['TFIDF'] = round($freqArr['TFIDF'], 2);
    //preprint_r($MODEL_QAC['QAC_MASTERTABLE']);
    //preprint_r(array_keys($MODEL_QAC['QAC_FEATURES']));
    $buckwalterTransliteration = "";
    $posTagsArr = array();
    $lemmasArr = array();
    $wordRoot = "";
    $featuresArr = array();
    $versesArr = array();
    $versesTagsArr = array();
    $buckwalterTransliteration = "";
    $wordRoot = "";
    if (empty($wordSimple) || !modelEntryExistsInMemory("AR", "MODEL_SEARCH", "INVERTED_INDEX", $wordSimple)) {
        return null;
    }
    //preprint_r($MODEL_SEARCH['INVERTED_INDEX'][$wordSimple]);
    $invertedIndexEntry = getModelEntryFromMemory("AR", "MODEL_SEARCH", "INVERTED_INDEX", $wordSimple);
    $QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
    $TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", "");
    $PAUSEMARKS = $TOTALS['PAUSEMARKS'];
    foreach ($invertedIndexEntry as $documentArrInIndex) {
        $SURA = $documentArrInIndex['SURA'];
        $AYA = $documentArrInIndex['AYA'];
        $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
        $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
        $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
        $EXTRA_WORD_TYPE_INFO = $documentArrInIndex['EXTRA_INFO'];
        // INGORE ROOT SOURCES AND PRONOUNS, WE ONLY NEED THE NROMAL CORRESPONDING WORD
        if ($WORD_TYPE == "PRONOUN_ANTECEDENT" || $WORD_TYPE == "ROOT") {
            continue;
        }
        $qacLocation = getQACLocationStr($SURA + 1, $AYA + 1, $INDEX_IN_AYA_UTHMANI);
        //echoN($qacLocation);exit;;
        //preprint_r($MODEL_QAC['QAC_MASTERTABLE'][$qacLocation]);
        //exit;
        $qacMasterTableEntryArr = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $qacLocation);
        // search QAC for roots and LEMMAS for this word
        foreach ($qacMasterTableEntryArr as $segmentIndex => $segmentDataArr) {
            $tag = $segmentDataArr['TAG'];
            $segmentWord = $segmentDataArr['FORM_AR'];
            //echoN($segmentWord);
            //preprint_r($segmentDataArr);
            $segmentWordSimple = "";
            $simpleRepresentation = getItemFromUthmaniToSimpleMappingTable($segmentWord);
            // !empty() produced error = Can't use function return value in write context
            if (strlen(trim($simpleRepresentation)) > 0) {
                $segmentWordSimple = $simpleRepresentation;
            }
            $buckwalterTransliteration = $segmentDataArr['FORM_EN'];
            if (isset($segmentDataArr['FEATURES']['LEM'])) {
                $lemma = $segmentDataArr['FEATURES']['LEM'];
            }
            $featuresArr = array_merge($segmentDataArr['FEATURES']);
            $verseText = getVerseByQACLocation($QURAN_TEXT, $qacLocation);
            $wordId = getWordIndexFromQACLocation($qacLocation);
            if ($exactWord == TRUE) {
                $wordFromVerseAtLocation = getWordFromVerseByIndex($PAUSEMARKS, $verseText, $wordId);
                if ($wordSimple !== $wordFromVerseAtLocation) {
                    continue;
                }
            }
            //echoN("$segmentWord|$tag");
            //for segments like ال no corresponding simple words to compare, not our target segment, so continue
            //if ( empty($segmentWordSimple)) continue;
            if (isset($segmentDataArr['FEATURES']['ROOT']) && $segmentDataArr['FEATURES']['ROOT'] != -1) {
                $wordRoot = $segmentDataArr['FEATURES']['ROOT'];
            }
            $posTagsArr[$tag] = 1;
            $lemmasArr[$lemma] = 1;
            //echoN("|$segmentWordSimple|$wordSimple|$segmentWord");
            //$verseText = markSpecificWordInText($verseText,$wordId,$segmentWordSimple,"marked_fg");
            $qacVerseLocation = substr($qacLocation, 0, strrpos($qacLocation, ":"));
            if (!isset($versesArr[$qacVerseLocation])) {
                $versesArr[$qacVerseLocation] = $verseText;
            }
            if (!isset($versesTagsArr[$qacVerseLocation])) {
                $versesTagsArr[$qacVerseLocation] = "";
            }
            $versesTagsArr[$qacVerseLocation] = $versesTagsArr[$qacVerseLocation] . " " . $tag;
        }
        // we don't need all inverted index list except for verses, only break if we found at least one word
        if ($fast == true && !empty($versesArr)) {
            break;
        }
    }
    $wordInfoArr['BUCKWALTER'] = $buckwalterTransliteration;
    $wordInfoArr['ROOT'] = $wordRoot;
    $wordInfoArr['LEM'] = $lemmasArr;
    $wordInfoArr['POS'] = $posTagsArr;
    $wordInfoArr['VERSES'] = $versesArr;
    $wordInfoArr['VERSES_POS_TAGS'] = $versesTagsArr;
    $wordInfoArr['FEATURES'] = $featuresArr;
    return $wordInfoArr;
}
function addRelation(&$relationsArr, $type, $subject, $verb, $object, $joinedPattern, $verbEngTranslation = "", $fullVerbQuranWord = "")
{
    global $WORDS_TRANSLATIONS_AR_EN;
    global $is_a_relation_name_en;
    if (empty($subject) || empty($object)) {
        return false;
    }
    // make shallow last resort, since it spoils words and lead to duplicate oncepts
    if (!isSimpleQuranWord($subject)) {
        //CONVERT UTHMANI TO SIMPLE
        $subjectSimple = getItemFromUthmaniToSimpleMappingTable($subject);
        // IF NOT CORRESPONDING SIMPLE WORD, CONVERT USING SHALLOW CONVERSION ALGORITHM
        if (empty($subjectSimple)) {
            $subjectSimple = shallowUthmaniToSimpleConversion($subject);
        }
    } else {
        $subjectSimple = $subject;
    }
    // SAME AS ABOVE BUT FOR OBJECT
    if (!isSimpleQuranWord($object)) {
        $objectSimple = getItemFromUthmaniToSimpleMappingTable($object);
        //object simple to avoid null in case when not in the mapping table
        if (empty($objectSimple)) {
            $objectSimple = shallowUthmaniToSimpleConversion($object);
        }
    } else {
        $objectSimple = $object;
    }
    $verbUthmani = $verb;
    $verbSimple = "";
    ///////// VERB TRANSLATION
    if (empty($verbEngTranslation)) {
        $verbEngTranslation = "";
        // SINGLE WORD VERB
        if (!isMultiWordStr($verb)) {
            $verb = trim($verb);
            $translatableVerb = $fullVerbQuranWord;
            // VERB IS SIMPLE
            if (isSimpleQuranWord($verb)) {
                $translatableVerb = getItemFromUthmaniToSimpleMappingTable($fullVerbQuranWord);
            } else {
                $verbSimple = getItemFromUthmaniToSimpleMappingTable($verb);
            }
            $verbEngTranslation = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]);
            //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED
            if (empty($verbEngTranslation)) {
                // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY
                if (!isFoundInTranslationTable($translatableVerb, "VERB")) {
                    // TRANSLATE USING MICROSOFT API
                    $verbEngTranslation = translateText($translatableVerb, "ar", "en");
                    // ADD TO QA CUSTOM TRANSLATION TABLE
                    addTranslationEntry($verbEngTranslation, "VERB", $translatableVerb, "AR");
                    //no need
                    //persistTranslationTable();
                } else {
                    $customTranslationEntryArr = getTranlationEntryByEntryKeyword($translatableVerb);
                    $verbEngTranslation = $customTranslationEntryArr['EN_TEXT'];
                }
            }
        } else {
            //SPLIT PHRASE
            $verbPhraseArr = preg_split("/ /", $verb);
            foreach ($verbPhraseArr as $verbPart) {
                $translatableVerb = $verbPart;
                // IF SIMPLE
                if (isSimpleQuranWord($verbPart)) {
                    //GET UTHMANI WORD TO BE ABEL TO TRANSLATE
                    $translatableVerb = getItemFromUthmaniToSimpleMappingTable($verbPart);
                } else {
                    // GET SIMPLE WORD TO BE ADDED IN RELATION META
                    $simplePart = getItemFromUthmaniToSimpleMappingTable($verbPart);
                    //if not in translation table, use shalow conversion
                    if (empty($simplePart)) {
                        $simplePart = shallowUthmaniToSimpleConversion($verbPart);
                    }
                    $verbSimple = $verbSimple . " " . $simplePart;
                    // THIS VARIABLE NEEDED FOR TRANSLATION
                    $translatableVerb = $simplePart;
                }
                // TRANSLATE
                $verbPartTranslated = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]);
                //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED
                if (empty($verbPartTranslated)) {
                    // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY
                    if (!isFoundInTranslationTable($verbPart, "VERB")) {
                        // TRANSLATE USING MICROSOFT API
                        $verbPartTranslated = translateText($verbPart, "ar", "en");
                        // ADD TO QA CUSTOM TRANSLATION TABLE
                        addTranslationEntry($verbPartTranslated, "VERB", $verbPart, "AR");
                        //persistTranslationTable();
                    } else {
                        $customTranslationEntryArr = getTranlationEntryByEntryKeyword($verbPart);
                        $verbPartTranslated = $customTranslationEntryArr['EN_TEXT'];
                    }
                }
                // TRANSLATION ACCUMILATION
                $verbEngTranslation = $verbEngTranslation . " " . $verbPartTranslated;
            }
        }
    }
    if ($verbEngTranslation != "is kind of" && $verbEngTranslation != "part of" && $verbEngTranslation != $is_a_relation_name_en) {
        //$verbEngTranslation = removeBasicEnglishStopwordsNoNegation($verbEngTranslation);
    }
    $verbSimple = trim($verbSimple);
    if (empty($verbSimple)) {
        $verbSimple = removeTashkeel(shallowUthmaniToSimpleConversion($verbUthmani));
    }
    return addNewRelation($relationsArr, $type, $subjectSimple, $verbSimple, $objectSimple, $joinedPattern, $verbEngTranslation, $verbUthmani);
}
if ($lang == "EN") {
    showTechnicalError("Only Arabic is supported here, you chose English !");
}
$lang = "AR";
loadModels("core,search,qac", $lang);
$word = trim($_GET['word']);
//preprint_r($poTaggedSubsentences);
//echoN("SubSentences Count:".addCommasToNumber(count($poTaggedSubsentences)));
$topPoSAggregation = array();
$ssPoSAggregation = array();
$ssPoSAggregationCorrespondingSent = array();
//echoN("Word:$word");
$targetType = "POS";
if (isArabicString($word)) {
    $targetType = "WORD";
    if (isSimpleQuranWord($word)) {
        $poTaggedSubsentences = getPoSTaggedSubsentences("SIMPLE");
    } else {
        $poTaggedSubsentences = getPoSTaggedSubsentences();
    }
} else {
    $poTaggedSubsentences = getPoSTaggedSubsentences();
}
$targetPOSorWord = trim($word);
if ($targetType == "POS") {
    if (!modelEntryExistsInMemory("AR", "MODEL_QAC", "QAC_POS", $targetPOSorWord)) {
        showTechnicalError("Not a valid PoS tag !");
        exit;
    }
} else {
    if (empty($targetPOSorWord)) {
function extendQueryByExtractingQACDerviations($extendedQueryWordsArr)
{
    global $MODEL_SEARCH;
    /** GET ROOT/STEM FOR EACH QUERY WORD **/
    foreach ($extendedQueryWordsArr as $word => $index) {
        //preprint_r($MODEL_SEARCH['INVERTED_INDEX'][$word]);exit;
        $invertedIndexEntryArr1 = getModelEntryFromMemory("AR", "MODEL_SEARCH", "INVERTED_INDEX", $word);
        foreach ($invertedIndexEntryArr1 as $documentArrInIndex) {
            $SURA = $documentArrInIndex['SURA'];
            $AYA = $documentArrInIndex['AYA'];
            $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
            $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
            $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
            $EXTRA_WORD_TYPE_INFO = $documentArrInIndex['EXTRA_INFO'];
            //echoN("|$INDEX_IN_AYA_EMLA2Y|");
            //$INDEX_IN_AYA_EMLA2Y = getImla2yWordIndexByUthmaniLocation(getQACLocationStr($SURA+1,$AYA+1,$INDEX_IN_AYA_EMLA2Y),$UTHMANI_TO_SIMPLE_LOCATION_MAP);
            //echoN("|$INDEX_IN_AYA_UTHMANI|");
            $qacLocation = getQACLocationStr($SURA + 1, $AYA + 1, $INDEX_IN_AYA_UTHMANI);
            //echoN($word);
            //echoN($WORD_TYPE);
            //preprint_r($documentArrInIndex);
            //preprint_r($MODEL_QAC['QAC_MASTERTABLE'][$qacLocation]);
            $qacMasterTableEntryArr2 = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $qacLocation);
            // search QAC for roots and LEMMAS for this word
            foreach ($qacMasterTableEntryArr2 as $segmentIndex => $segmentDataArr) {
                $segmentFormAR = $segmentDataArr['FORM_AR'];
                $segmentFormARimla2y = getItemFromUthmaniToSimpleMappingTable($segmentFormAR);
                //preprint_r($segmentDataArr);
                //echoN($segmentFormAR);
                //echoN($segmentFormARimla2y);
                //echoN($qacLocation);
                // the current query word has a ROOT in the current QAC segment
                if ($WORD_TYPE == "NORMAL_WORD" && isset($segmentDataArr['FEATURES']['STEM'])) {
                    // get QAC root and LEM for the current query word
                    $rootOfQueryQord = $segmentDataArr['FEATURES']['ROOT'];
                    $stemOfQueryWord = $segmentDataArr['FEATURES']['LEM'];
                    /*
                    						 if ( empty($stemOfQueryWord) || empty($rootOfQueryQord))
                    						 {
                    						preprint_r($MODEL_QAC['QAC_MASTERTABLE'][$qacLocation]);
                    						echoN($rootOfQueryQord);
                    						echoN($stemOfQueryWord);
                    						exit;
                    						}*/
                    // add the STEMS to out extended query words
                    if (!empty($rootOfQueryQord) && !isset($extendedQueryWordsArr[$rootOfQueryQord])) {
                        $extendedQueryWordsArr[$rootOfQueryQord] = 1;
                    }
                    if (!isset($extendedQueryWordsArr[$stemOfQueryWord])) {
                        $extendedQueryWordsArr[$stemOfQueryWord] = 1;
                    }
                }
            }
        }
        ////////// CUSTOM ROOT TABLE ///////////
        //TODO:
        $zawaga = "زوج";
        $CUSTOM_ROOTS_TABLE['الزواج'] = $zawaga;
        if (isset($CUSTOM_ROOTS_TABLE[$word])) {
            $extendedQueryWordsArr[$CUSTOM_ROOTS_TABLE[$word]] = 1;
        }
        ////////////////////////////////////////
    }
    $QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
    $TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", "");
    $PAUSEMARKS = $TOTALS['PAUSEMARKS'];
    /** GET EMLA2Y (SIMPLE) WORDS CORRESPONDING TO ANY QAC SEGMENT CONTAINING THE ROOT/STEMS IN THE EXTENDED QUERY WORD FROM INVERTED INDEX
     *  ADD TO EXTENDED QUERY WORDS
     *  TODO: recheck to remove this whole loop
     * **/
    foreach ($extendedQueryWordsArr as $word => $dummy) {
        // ONLY UTHMANI SHOULD BE HANDLED
        if (isSimpleQuranWord($word)) {
            continue;
        }
        $invertedIndexEntry = getModelEntryFromMemory("AR", "MODEL_SEARCH", "INVERTED_INDEX", $word);
        foreach ($invertedIndexEntry as $documentArrInIndex) {
            $SURA = $documentArrInIndex['SURA'];
            $AYA = $documentArrInIndex['AYA'];
            $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
            $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
            $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
            $EXTRA_WORD_TYPE_INFO = $documentArrInIndex['EXTRA_INFO'];
            $qacLocation = getQACLocationStr($SURA + 1, $AYA + 1, $INDEX_IN_AYA_UTHMANI);
            //preprint_r($MODEL_QAC['QAC_MASTERTABLE'][$qacLocation]);
            $verseText = getVerseByQACLocation($QURAN_TEXT, $qacLocation);
            $wordFromVerse = getWordFromVerseByIndex($PAUSEMARKS, $verseText, $INDEX_IN_AYA_EMLA2Y);
            if (empty($wordFromVerse)) {
                continue;
            }
            if ($WORD_TYPE == "PRONOUN_ANTECEDENT") {
                //echoN($wordFromVerse);
                // PRONOUNS SHOULD NOT BE ADDED TO THE QUERY BECAUSE THEY CAN REFER TO MANY THINGS
                // OTHER THAN THE ORIGINAL QUERY
                continue;
            }
            if (!isset($extendedQueryWordsArr[$wordFromVerse])) {
                $extendedQueryWordsArr[$wordFromVerse] = $qacLocation;
            }
        }
    }
    return $extendedQueryWordsArr;
}