PHP rsortBy示例

编程语言: PHP

方法/功能: rsortBy

hotexamples.com的示例: 4

PHP rsortBy - 已找到4个示例。这些是从开源项目中提取的最受好评的rsortBy现实PHP示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： question.answering.lib.php 项目： AhmedAMohamed/qurananalysis

function answerUserQuestion($query, $queryWordsArr, $taggedSignificantWords, $scoringTable, $lang)
{
    global $is_a_relation_name_ar;
    $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", "");
    // answering by relevance and similarity
    $conceptsFromTaxRelations = extendQueryWordsByConceptTaxRelations($taggedSignificantWords, $lang, true);
    $COMMON_CONCEPTS_FACTOR = 10;
    $COMMON_QUESTION_TYPE_CONCEPTS_FACTOR = 10;
    $COMMON_ROOTS_FACTOR = 10;
    $COMMON_DERIVATIONS_FACTOR = 10;
    $scoredAnswerVersesArr = array();
    //preprint_r($taggedSignificantWords);
    //echoN($query);
    $questionType = containsQuestionWords($query, $lang);
    ////////// COMMON CONCEPTS IN QUESTION
    $conceptsInQuestionTextArr = getConceptsFoundInText($query, $lang);
    //preprint_r($conceptsInQuestionTextArr);
    ///////////////////////////////////////
    /////////// GET CONCEPTS FOR THE QUESTION TYPE
    /// GET INSTANCE CONCEPTS FROM QUESTION TYPE CLASS
    $questionType = cleanAndTrim(strtolower($questionType));
    //echoN($questionType);
    //$conceptID = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$questionType];
    $conceptID = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $questionType);
    //echoN($conceptID);
    //$relationsOfConceptAsTarget = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptID];
    $relationsOfConceptAsTarget = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptID);
    $questionTypeConceptsArr = array();
    foreach ($relationsOfConceptAsTarget as $index => $relArr) {
        $verb = $relArr["link_verb"];
        $subject = $relArr["source"];
        //echoN("CC:$is_a_relation_name_ar|$verb");
        if ($verb != $is_a_relation_name_ar) {
            continue;
        }
        $questionTypeConceptsArr[] = $subject;
    }
    //////////////////////////////////////////////
    $debugArr = array();
    //// Answering by similarity and relevance
    foreach ($scoringTable as $documentID => $documentScoreArr) {
        //preprint_r($documentScoreArr);
        $relevanceReverseOrderIndex--;
        $SURA = $documentScoreArr['SURA'];
        $AYA = $documentScoreArr['AYA'];
        $TEXT = $QURAN_TEXT[$SURA][$AYA];
        $score = $documentScoreArr['SCORE'];
        //echoN("SCORE BEFORE QUESTION RELEVANCE:$score");
        if ($lang == "EN") {
            $TEXT = strtolower($TEXT);
        }
        //echoN($TEXT);
        $conceptsInTextArr = getConceptsFoundInText($TEXT, $lang);
        //preprint_r($conceptsInTextArr);
        /////////// COMMON CONCEPTS BWTEEEN QUESTION AND A VERSE TEXT
        $commonQuestionVerseConceptsCount = getIntersectionCountOfTwoArrays(array_keys($conceptsInQuestionTextArr), array_keys($conceptsInTextArr));
        //echoN("Common Concepts:$commonQuestionVerseConceptsCount");
        $debugArr[$documentID]['COMMON_CONCEPTS'] = $commonQuestionVerseConceptsCount;
        $debugArr[$documentID]['COMMON_CONCEPTS_LIST'] = join(" ", array_intersect(array_keys($conceptsInQuestionTextArr), array_keys($conceptsInTextArr)));
        //preprint_r($debugArr);exit;
        $score += $commonQuestionVerseConceptsCount * $COMMON_CONCEPTS_FACTOR;
        ///////////////////////////////////////////////////////////
        //preprint_r($questionTypeConceptsArr);
        //preprint_r(array_keys($conceptsInTextArr));
        $numberOfSharedConceptsForThisQuestionType = getIntersectionCountOfTwoArrays($questionTypeConceptsArr, array_keys($conceptsInTextArr));
        //echoN($numberOfSharedConceptsForThisQuestionType);
        $score += $numberOfSharedConceptsForThisQuestionType * $COMMON_QUESTION_TYPE_CONCEPTS_FACTOR;
        $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS'] = $numberOfSharedConceptsForThisQuestionType;
        $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS_LIST'] = join(" ", array_intersect($questionTypeConceptsArr, array_keys($conceptsInTextArr)));
        //// QUESION-VERSE SIMILARITY MESUREMENT (wITH DERIVATIONS CONSIDERED)
        $wordsInVerseTextArr = explode(" ", $TEXT);
        $derivationHandledB4 = array();
        $commonDerivations = 0;
        if ($lang == "EN") {
            foreach ($taggedSignificantWords as $wordInQuestion => $pos) {
                //echoN("$word $pos");
                // for words like i (NOUN in the lexicon for some reson )
                if (mb_strlen($wordInQuestion) <= 2) {
                    continue;
                }
                if ($pos == "VBN" || $pos == "VBD" || $pos == "VBG" || $pos == "NN" || $pos == "NNS") {
                    foreach ($wordsInVerseTextArr as $index => $wordInArray) {
                        $wordInArray = cleanAndTrim($wordInArray);
                        if (mb_strlen($wordInArray) <= 2) {
                            continue;
                        }
                        // if any word (noun/verb) in the quetion is a substring
                        if (strpos($wordInArray, $wordInQuestion) !== false || strpos($wordInQuestion, $wordInArray) !== false) {
                            if (isset($derivationHandledB4[$wordInArray])) {
                                continue;
                            }
                            //echoN("$word is SS in VerseText");
                            $commonDerivations++;
                            $derivationHandledB4[$wordInArray] = 1;
                            //$debugArr[$documentID]['COMMON_DERIVATIONS_LIST']=
                            //$debugArr[$documentID]['COMMON_DERIVATIONS_LIST']."|".$wordInArray;
                        }
                    }
                }
            }
            $score += $commonDerivations * $COMMON_DERIVATIONS_FACTOR;
            $debugArr[$documentID]['COMMON_DERIVATIONS'] = $commonDerivations;
        } else {
            $questionWordsRootsArr = array();
            foreach ($taggedSignificantWords as $wordInQuestion => $pos) {
                if (mb_strlen($wordInQuestion) <= 2) {
                    continue;
                }
                if ($pos == "NN" || $pos == "NNS") {
                    //echoN("===$wordInQuestion");
                    $root = getRootOfSimpleWord($wordInQuestion, array("N", "V"));
                    if (!empty($root)) {
                        $questionWordsRootsArr[] = $root;
                    }
                }
            }
            //preprint_r($questionWordsRootsArr);
            //exit;
            $verseWordsRootsArr = array();
            foreach ($wordsInVerseTextArr as $index => $wordInArray) {
                if (mb_strlen($wordInArray) <= 2) {
                    continue;
                }
                $root = getRootOfSimpleWord($wordInArray, array("N", "V"));
                if (!empty($root)) {
                    $verseWordsRootsArr[] = $root;
                }
            }
            //preprint_r($verseWordsRootsArr);
        }
        $commonRootsCount = getIntersectionCountOfTwoArrays($verseWordsRootsArr, $questionWordsRootsArr);
        $score += $commonRootsCount * $COMMON_ROOTS_FACTOR;
        $debugArr[$documentID]['COMMON_ROOTS'] = $commonRootsCount;
        //echoN($commonRootsCount);
        /////////////////////////////////////////////////////////
        //echoN("SCORE AFTER QUESTION RELEVANCE:$score");
        $scoringTable[$documentID]['SCORE'] = $score;
        $scoredAnswerVersesArr[$documentID] = $scoringTable[$documentID];
    }
    rsortBy($scoredAnswerVersesArr, "SCORE");
    //preprint_r($debugArr);
    //preprint_r($scoredAnswerVersesArr);exit;
    $scoredAnswerVersesArr = array_slice($scoredAnswerVersesArr, 0, 3);
    //// REMOVE ANY VERSE FROM THE FINAL LIST WHICH HAS NO OBVIOUS SIMILARITY WITH THE QUESTION
    foreach ($scoredAnswerVersesArr as $documentID => $verseArr) {
        //preprint_r($debugArr[$documentID]);
        if ($debugArr[$documentID]['COMMON_ROOTS'] == 0 && $debugArr[$documentID]['COMMON_CONCEPTS'] == 0 && $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS'] == 0 && $debugArr[$documentID]['COMMON_DERIVATIONS'] == 0) {
            unset($scoredAnswerVersesArr[$documentID]);
        }
    }
    /////////////////////////////////////
    //preprint_r($scoredAnswerVersesArr);
    //preprint_r($scoredAnswerVersesArr);
    return array("ANSWER_CONCEPTS" => $conceptsFromTaxRelations, "ANSWER_VERSES" => $scoredAnswerVersesArr);
}

示例#2

显示文件

文件： search.lib.php 项目： AhmedAMohamed/qurananalysis

function getScoredDocumentsFromInveretdIndex($extendedQueryWordsArr, $query, $isPhraseSearch, $isQuestion, $isColumnSearch, $columnSearchKeyValParams, $isConceptSearch, $lang, $isTransliterationSearch)
{
    global $MODEL_CORE, $MODEL_SEARCH;
    $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", "");
    $TOTALS = getModelEntryFromMemory($lang, "MODEL_CORE", "TOTALS", "");
    $PAUSEMARKS = $TOTALS['PAUSEMARKS'];
    if ($isColumnSearch) {
        $SURA = $columnSearchKeyValParams['KEY'] - 1;
        $isFullChapter = $columnSearchKeyValParams['VAL'] == "ALL";
        if ($isFullChapter) {
            $suraSize = count($QURAN_TEXT[$SURA]);
            for ($AYA = 0; $AYA < $suraSize; $AYA++) {
                $scoringTable[$SURA . ":" . $AYA] = array();
                $scoringTable[$SURA . ":" . $AYA]['SCORE'] = 1;
                $scoringTable[$SURA . ":" . $AYA]['SURA'] = $SURA;
                $scoringTable[$SURA . ":" . $AYA]['AYA'] = $AYA;
            }
        } else {
            $AYA = $columnSearchKeyValParams['VAL'] - 1;
            // VERSE VALIDITY CHECK
            $qacLocation = getQACLocationStr($SURA + 1, $AYA + 1, 0);
            $verseText = getVerseByQACLocation($QURAN_TEXT, $qacLocation);
            if (empty($verseText)) {
                return array();
            }
            //////////////////////
            $scoringTable[$SURA . ":" . $AYA] = array();
            $scoringTable[$SURA . ":" . $AYA]['SCORE'] = 1;
            $scoringTable[$SURA . ":" . $AYA]['SURA'] = $SURA;
            $scoringTable[$SURA . ":" . $AYA]['AYA'] = $AYA;
        }
        return $scoringTable;
    }
    //	$MODEL_QURANA  = apc_fetch("MODEL_QURANA");
    //preprint_r($extendedQueryWordsArr);
    //$isOneWordQuery = preg_match("/ /", $query)==0;
    //preprint_r($extendedQueryWordsArr);
    /**
     * GET ALL RESULT FORM INDEX USING EXTENDED QUERY WORD (WHICH INCLUDES ALL VARIATIONS AND PRONOUNS)
     */
    foreach ($extendedQueryWordsArr as $word => $targetQACLocation) {
        //echoN("|$word|");
        //echoN($lang);
        //echoN($isConceptSearch);
        /*if ($lang=="EN" && $isConceptSearch )
        		{
        
        			
        		}*/
        //preprint_r($MODEL_SEARCH['INVERTED_INDEX'][$word]);
        $invertedIndexEntry = getModelEntryFromMemory($lang, "MODEL_SEARCH", "INVERTED_INDEX", $word);
        foreach ($invertedIndexEntry as $documentArrInIndex) {
            //echoN("$word");
            //preprint_r($documentArrInIndex);;
            $SURA = $documentArrInIndex['SURA'];
            $AYA = $documentArrInIndex['AYA'];
            $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
            $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
            $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
            $EXTRA_INFO = $documentArrInIndex['EXTRA_INFO'];
            //echo getQACLocationStr($SURA,$AYA,$INDEX_IN_AYA_EMLA2Y);
            $qacLocation = getQACLocationStr($SURA + 1, $AYA + 1, $INDEX_IN_AYA_UTHMANI);
            $verseText = getVerseByQACLocation($QURAN_TEXT, $qacLocation);
            /*
             *
             * NOTE: A DECISION SHOULD BE TAKEN TO SERACH AROUND AND REMOVE PAUSE MARKS OR NOT
             */
            if ($lang == "AR") {
                $verseTextWithoutPauseMarks = removePauseMarkFromVerse($verseText);
            } else {
                $verseTextWithoutPauseMarks = removeSpecialCharactersFromMidQuery($verseText);
            }
            //echoN("|$query|$verseTextWithoutPauseMarks");
            $fullQueryIsFoundInVerseCount = preg_match_all("/(^|[ ]){$query}([ ]|\$)/umi", $verseTextWithoutPauseMarks);
            //echoN("$query | $word");
            if ($isPhraseSearch && $WORD_TYPE != "PRONOUN_ANTECEDENT") {
                $numberOfOccurencesForWord = $fullQueryIsFoundInVerseCount;
                if ($numberOfOccurencesForWord == 0) {
                    continue;
                }
            } else {
                $numberOfOccurencesForWord = preg_match_all("/{$word}/um", $verseText);
                /*if ( $numberOfOccurencesForWord> 100)
                		{
                			echoN($word);
                			echoN($verseText);
                			preprint_r($extendedQueryWordsArr);
                			exit;
                		}*/
            }
            //echoN($numberOfOccurencesForWord);
            //echoN("$qacLocation|$targetQACLocation|$word|$EXTRA_INFO|$WORD_TYPE");
            // incase of non normal word ( QAC/QURANA) .. translate WordIndex from Uthmani script to Imla2y script
            /*if ( $WORD_TYPE!="NORMAL_WORD"   )
            		{
            		//echoN("OLD:$INDEX_IN_AYA_EMLA2Y");
            		$INDEX_IN_AYA_EMLA2Y = getImla2yWordIndexByUthmaniLocation($qacLocation,$UTHMANI_TO_SIMPLE_LOCATION_MAP);
            		//echoN("NEW:$INDEX_IN_AYA_EMLA2Y");
            		}*/
            //echoN($word);
            //preprint_r($documentArrInIndex);
            //preprint_r($MODEL_QAC['QAC_MASTERTABLE'][$qacLocation]);
            if (!isset($scoringTable[$SURA . ":" . $AYA])) {
                $scoringTable[$SURA . ":" . $AYA] = array();
                $scoringTable[$SURA . ":" . $AYA]['SCORE'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['FREQ'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['DISTANCE'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['WORD_OCCURENCES_COUNT'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['QUERY_WORDS_IN_VERSE'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['IS_FULL_QUERY_IN_VERSE'] = 0;
                $scoringTable[$SURA . ":" . $AYA]['SURA'] = $SURA;
                $scoringTable[$SURA . ":" . $AYA]['AYA'] = $AYA;
                $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'] = array();
                $scoringTable[$SURA . ":" . $AYA]['WORD_TYPE'] = $WORD_TYPE;
                $scoringTable[$SURA . ":" . $AYA]['EXTRA_INFO'] = $EXTRA_INFO;
                $scoringTable[$SURA . ":" . $AYA]['INDEX_IN_AYA_EMLA2Y'] = $INDEX_IN_AYA_EMLA2Y;
                $scoringTable[$SURA . ":" . $AYA]['INDEX_IN_AYA_UTHMANI'] = $INDEX_IN_AYA_UTHMANI;
                $scoringTable[$SURA . ":" . $AYA]['PRONOUNS'] = array();
            }
            $scoringTable[$SURA . ":" . $AYA]['WORD_OCCURENCES_COUNT'] = $numberOfOccurencesForWord;
            //echoN($numberOfOccurencesForWord);
            if (!isset($scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][$word]) && $numberOfOccurencesForWord > 0 && $scoringTable[$SURA . ":" . $AYA]['FREQ'] > 0) {
                //TODO: seems duplicate of WORD_OCCURENCES_COUNT
                // Raise the frequency (score) of ayas containing more than one of the query items
                $scoringTable[$SURA . ":" . $AYA]['FREQ']++;
                //=$numberOfOccurencesForWord;
            } else {
                $scoringTable[$SURA . ":" . $AYA]['FREQ']++;
            }
            /*$verseArr = preg_split("/ /",$MODEL_CORE['QURAN_TEXT'][$SURA][$AYA]);
            			 	
            			$verseArr = removePauseMarksFromArr($MODEL_CORE['TOTALS']['PAUSEMARKS'],$verseArr);
            				
            	
            			$simpleWordFromText = $verseArr[$INDEX_IN_AYA_EMLA2Y-1];
            			*/
            /*
             if ( empty($simpleWordFromText))
             {
            echoN($INDEX_IN_AYA_EMLA2Y);
            preprint_r($verseArr);
            }
            	
            echoN($qacLocation);
            echoN($word);
            echoN($INDEX_IN_AYA_EMLA2Y);
            echoN($MODEL_CORE['QURAN_TEXT'][$SURA][$AYA]);
            echoN($simpleWordFromText);
            preprint_r($verseArr);
            */
            // STEM or PRONOUN
            if ($WORD_TYPE == "PRONOUN_ANTECEDENT") {
                $scoringTable[$SURA . ":" . $AYA]['PRONOUNS'][$EXTRA_INFO] = $INDEX_IN_AYA_EMLA2Y;
            } else {
                if ($WORD_TYPE == "ROOT" || $WORD_TYPE == "LEM") {
                    // for non-normal words this will get the whole  segment
                    $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][$word] = $WORD_TYPE;
                    // needed to fix root that are sometimes converted by uthmani/simple map below
                    $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][removeTashkeel($EXTRA_INFO)] = $WORD_TYPE;
                    // try to convert QAC uthmani word to simpleimla2y using the MAP table with and withou tashkeel
                    $wordInAya = getItemFromUthmaniToSimpleMappingTable($EXTRA_INFO);
                    if (empty($wordInAya)) {
                        $wordInAya = getItemFromUthmaniToSimpleMappingTable(removeTashkeel($EXTRA_INFO));
                    }
                    if (empty($wordInAya)) {
                        $wordInAya = removeTashkeel($EXTRA_INFO);
                    }
                    /*if ( empty($wordInAya ) )
                    		 {
                    		preprint_r($documentArrInIndex);
                    		echoN($EXTRA_INFO);
                    		echo"HERE";
                    		preprint_r($scoringTable[$SURA.":".$AYA]);exit;
                    		}*/
                    //echoN("$word-$wordInAya-$EXTRA_INFO");
                    $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][$wordInAya] = $WORD_TYPE;
                } else {
                    if ($isTransliterationSearch) {
                        $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][$word] = $WORD_TYPE;
                    } else {
                        // word was in original user query, not in our extended one
                        ///if ( in_array($word,$queryWordsArr))
                        //{
                        $scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS'][$word] = $WORD_TYPE;
                    }
                    //}
                }
            }
            $scoringTable[$SURA . ":" . $AYA]['IS_FULL_QUERY_IN_VERSE'] = $fullQueryIsFoundInVerseCount;
            $scoringTable[$SURA . ":" . $AYA]['QUERY_WORDS_IN_VERSE'] = count($scoringTable[$SURA . ":" . $AYA]['POSSIBLE_HIGHLIGHTABLE_WORDS']);
            $scoringTable[$SURA . ":" . $AYA]['SCORE'] = $scoringTable[$SURA . ":" . $AYA]['FREQ'] / 2 + $scoringTable[$SURA . ":" . $AYA]['DISTANCE'] * 1 + $scoringTable[$SURA . ":" . $AYA]['QUERY_WORDS_IN_VERSE'] * 10 + count($scoringTable[$SURA . ":" . $AYA]['PRONOUNS']) * 1 + $scoringTable[$SURA . ":" . $AYA]['WORD_OCCURENCES_COUNT'] * 1 + $scoringTable[$SURA . ":" . $AYA]['IS_FULL_QUERY_IN_VERSE'] * 20;
        }
    }
    rsortBy($scoringTable, 'SCORE');
    //preprint_r($scoringTable);exit;
    return $scoringTable;
}

示例#3

显示文件

文件： model.loader.php 项目： AhmedAMohamed/qurananalysis

function loadModel($lang, $type, $file)
{
    global $WORDS_FREQUENCY_ARR, $TOTALS_ARR, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, $MODEL_QURANA;
    global $sajdahMark, $saktaLatifaMark, $pauseMarksFile, $serializedModelFile, $basmalaTextUthmani;
    global $numberOfSuras, $numberOfVerses, $quranMetaDataFile, $arabicStopWordsFile, $englishStopWordsFile;
    global $META_DATA, $basmalaText, $englishResourceFile, $arabicResourceFile, $quranCorpusMorphologyFile;
    global $quranaPronounResolutionConceptsFile, $quranaPronounResolutionDataFileTemplate, $quranFileUthmaniAR;
    global $TRANSLATION_MAP_EN_TO_AR, $TRANSLATION_MAP_AR_TO_EN, $TRANSLITERATION_WORDS_MAP, $TRANSLITERATION_VERSES_MAP;
    global $basmalaTextUthmani2, $arabicStopWordsFileL2;
    global $TRANSLITERATION_WORDS_LOCATION_MAP;
    $QURAN_TEXT = array();
    $invertedIndexBatchApcArr = array();
    $qacMasterTableBatchApcArr = array();
    $qacPOSTableBatchApcArr = array();
    $qacFeatureTableBatchApcArr = array();
    $TOTALS_ARR = array();
    $TOTALS_ARR['CHARS'] = 0;
    $TOTALS_ARR['WORDS'] = 0;
    $TOTALS_ARR['NRWORDS'] = 0;
    $TOTALS_ARR['VERSES'] = 0;
    $TOTALS_ARR['SURAS'] = $numberOfSuras;
    $TOTALS_ARR['CHAPTERS'] = 30;
    $TOTALS_ARR['TOTAL_PER_SURA'] = array();
    $TOTALS_ARR['SAJDAT_TELAWA'] = array();
    $TOTALS_ARR['PAUSEMARKS'] = array();
    $TOTALS_ARR['MIN_WORD_LENGTH'] = 0;
    $TOTALS_ARR['AVG_WORD_LENGTH'] = 0;
    $TOTALS_ARR['MAX_WORD_LENGTH'] = 0;
    $TOTALS_ARR['MIN_WORD'] = null;
    $TOTALS_ARR['MAX_WORD'] = null;
    $TOTALS_ARR['MIN_VERSE_LENGTH'] = 0;
    $TOTALS_ARR['AVG_VERSE_LENGTH'] = 0;
    $TOTALS_ARR['MAX_VERSE_LENGTH'] = 0;
    $TOTALS_ARR['MIN_VERSE'] = null;
    $TOTALS_ARR['MAX_VERSE'] = null;
    $TOTALS_ARR['SAJDAT_TELAWA']['COUNT'] = 0;
    $TOTALS_ARR['SAJDAT_TELAWA']['VERSES'] = array();
    $TOTALS_ARR['SAKTA_LATIFA']['COUNT'] = 0;
    $TOTALS_ARR['SAKTA_LATIFA']['VERSES'] = array();
    $INVERTED_INDEX = array();
    $WORDS_FREQUENCY_ARR = array();
    $WORDS_FREQUENCY_ARR['WORDS'] = array();
    $WORDS_FREQUENCY_ARR['WORDS_PER_SURA'] = array();
    $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'] = array();
    $WORDS_FREQUENCY_ARR['WORDS_TFIDF'] = array();
    $WORDS_FREQUENCY_ARR['VERSE_ENDINGS'] = array();
    $WORDS_FREQUENCY_ARR['VERSE_BEGINNINGS'] = array();
    /** WORD LENGTH **/
    $minWordLength = 1000;
    $minWord = null;
    $maxWordLength = -1;
    $maxWord = null;
    $avgWordLength = 0;
    /** VERSE LENGTH **/
    $minVerseLength = 1000;
    $minVerse = null;
    $maxVerseLength = -1;
    $maxVerse = null;
    $avgVerseLength = 0;
    /** QAC Model **/
    // Master model, contains all QAC data
    $qacMasterSegmentTable = array();
    //pinters/indexes on the master table for POS and features
    $qacPOSTable = array();
    $qacFeaturesTable = array();
    //$qacWordsTable = array();
    $qacSegmentToWordTable = array();
    /** QURANA Corpus **/
    $quranaConcecpts = array();
    $quranaResolvedPronouns = array();
    ########### LOAD DATA ACCORDING TO MODEL SOURCE TYPE
    if ($type == "XML") {
        $sourceContent = simplexml_load_file($file);
    } else {
        $sourceContent = file($file, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    }
    if ($type == "TXT") {
        for ($s = 0; $s < $numberOfVerses; $s++) {
            $line = $sourceContent[$s];
            $lineArr = preg_split("/\\|/", $line);
            $suraIndex = $lineArr[0];
            $ayaIndex = $lineArr[1];
            $text = $lineArr[2];
            //strip "besm allah alrahman al raheem" from furst aya of all suras except the first one
            if (strpos($lang, "AR") !== false && $ayaIndex == 1 && $s != 0) {
                if ($lang == "AR") {
                    $text = trim(str_replace($basmalaText, "", $text));
                } else {
                    if ($lang == "AR_UTH") {
                        $text = trim(str_replace($basmalaTextUthmani, "", $text));
                        $text = trim(str_replace($basmalaTextUthmani2, "", $text));
                    }
                }
            }
            if (!isset($QURAN_TEXT[$suraIndex - 1])) {
                $QURAN_TEXT[$suraIndex - 1] = array();
            }
            $QURAN_TEXT[$suraIndex - 1][$ayaIndex - 1] = $text;
        }
    } else {
        if ($type == "XML") {
            for ($s = 0; $s < $numberOfSuras; $s++) {
                $suraSize = $META_DATA['SURAS'][$s]['ayas'];
                for ($a = 0; $a < $suraSize; $a++) {
                    $QURAN_TEXT[$s][$a] = (string) $sourceContent->sura[$s]->aya[$a]['text'];
                }
            }
        } else {
            throw new Exception("Invalid Source Type ({$type})");
        }
    }
    ##############################################################
    // free resources
    $sourceContent = null;
    unset($sourceContent);
    if ($lang == "AR") {
        ############ LOAD QAC (Quranic Arabic Corpus) FILE ###################################
        //dont skip new lines here (FILE_SKIP_EMPTY_LINES) for the skipping "57" condition below to work
        $qacFileLinesArr = file($quranCorpusMorphologyFile, FILE_IGNORE_NEW_LINES);
        $rootsLookupArray = array();
        $headerIndex = 0;
        $segmentIndex = 1;
        foreach ($qacFileLinesArr as $line) {
            $headerIndex++;
            //ignore header sections
            if ($headerIndex <= 57) {
                continue;
            }
            //if ( $segmentIndex >= 2) exit;
            //echoN($line);
            // convert columns to array
            $lineArr = preg_split("/\t/", $line);
            $location = $lineArr[0];
            $formOrSegment = $lineArr[1];
            $posTAG = $lineArr[2];
            $featuresList = $lineArr[3];
            //preprint_r($lineArr);
            // remove brackets from location and keep it only SURA/AYA/WORDINDEX/SEGMENTINDEX
            $masterID = preg_replace("/\\(|\\)|/", "", $location);
            $locationArr = preg_split("/\\:/", $masterID);
            $wordSegmentID = $locationArr[count($locationArr) - 1];
            $wordIndex = $locationArr[count($locationArr) - 2];
            $verseID = $locationArr[count($locationArr) - 3];
            $suraID = $locationArr[count($locationArr) - 4];
            // Remove segment index from location ( will be added as new array below )
            $masterID = substr($masterID, 0, strlen($masterID) - 2);
            // get the reversed buackwalter transliteration for the segment
            $formOrSegmentReverseTransliterated = buckwalterReverseTransliteration($formOrSegment);
            //echoN($formOrSegmentReverseTransliterated);
            // separate features
            $featuresTempArr = preg_split("/\\|/", $featuresList);
            //preprint_r($featuresTempArr);
            $featuresArr = array();
            foreach ($featuresTempArr as $oneFeature) {
                // feature is a key/value set
                if (strpos($oneFeature, ":") !== false) {
                    $oneFeatureKeyValueArr = preg_split("/\\:/", $oneFeature);
                    $featureName = $oneFeatureKeyValueArr[0];
                    $featureValue = $oneFeatureKeyValueArr[1];
                    if ($featureName == "LEM" || $featureName == "ROOT") {
                        //echoN($featureValue);
                        $featureValue = buckwalterReverseTransliteration($featureValue);
                    }
                } else {
                    $featureName = $oneFeature;
                    // 1 here just a dummy value
                    $featureValue = -1;
                }
                $featureValue = trim($featureValue);
                // fill Features Index table
                //$qacFeaturesTable[$featureName][$masterID]= $featureValue;
                $apcMemoryEntryKey = "{$lang}/MODEL_QAC/QAC_FEATURES/{$featureName}";
                $qacFeatureTableBatchApcArr[$apcMemoryEntryKey][$masterID] = $featureValue;
                $featuresArr[$featureName] = $featureValue;
                // non-word features should not be included
                if ($featureName == "LEM" || $featureName == "ROOT") {
                    addToInvertedIndex($invertedIndexBatchApcArr, $lang, trim($featureValue), $suraID - 1, $verseID - 1, $wordIndex, trim($featureName), $formOrSegmentReverseTransliterated);
                    if ($featureName == "ROOT") {
                        //$rootsLookupArray[$formOrSegmentReverseTransliterated]=$featureValue;
                        addValueToMemoryModel($lang, "MODEL_QAC", "QAC_ROOTS_LOOKUP", $formOrSegmentReverseTransliterated, $featureValue);
                    }
                }
            }
            //location significant before increment below
            $qacSegmentToWordTable[$segmentIndex] = $wordIndex;
            // Fill master table
            //$qacMasterSegmentTable[$masterID][]
            $qacMasterTableEntry = array("FORM_EN" => $formOrSegment, "FORM_AR" => $formOrSegmentReverseTransliterated, "TAG" => $posTAG, "SEGMENT_INDEX" => $segmentIndex++, "FEATURES" => $featuresArr);
            $apcMemoryEntryKey = "{$lang}/MODEL_QAC/QAC_MASTERTABLE/{$masterID}";
            $qacMasterTableBatchApcArr[$apcMemoryEntryKey][] = $qacMasterTableEntry;
            // Fill Part of Speech tagging table
            $qacPOSTable[$posTAG][$masterID] = $wordSegmentID;
            $apcMemoryEntryKey = "{$lang}/MODEL_QAC/QAC_POS/{$posTAG}";
            $qacPOSTableBatchApcArr[$apcMemoryEntryKey][$masterID] = $wordSegmentID;
        }
        //preprint_r($qacMasterSegmentTable);
        //preprint_r($qacFeaturesTable);
        //preprint_r($qacPOSTable);
        ##############################################################
        // free resources
        $qacFileLinesArr = null;
        unset($qacFileLinesArr);
        // need to fluch tabel in memory since it is needed by Qurana - in segment function
        addToMemoryModelBatch($qacMasterTableBatchApcArr);
    }
    ######### Qurana Pronomial Anaphone Corpus ###################
    //echoN($quranaPronounResolutionConceptsFile);
    // GET XML FILE CONTENT
    $xmlContent = file_get_contents($quranaPronounResolutionConceptsFile);
    // LOAD XML OBJECT - trim used to avoid first line empty error
    $concepts = simplexml_load_string(trim(stripHTMLComments($xmlContent)));
    // LOAD CONCEPTS
    foreach ($concepts->con as $index => $conceptObj) {
        $conceptID = (string) $conceptObj['id'];
        $conceptNameEN = (string) $conceptObj->english;
        $conceptNameAR = (string) $conceptObj->arabic;
        $quranaConcecpts[$conceptID] = array("EN" => trim($conceptNameEN), "AR" => trim($conceptNameAR), "FREQ" => 0);
    }
    $pronounsCount = 0;
    $segmentsCount = 0;
    //preprint_r($quranaConcecpts);
    // LOAD PRONOUNS // load & parse the file of each SURA and load it in the model
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $suraID = $s + 1;
        $pronounDataFileName = preg_replace("/%s/", $suraID, $quranaPronounResolutionDataFileTemplate);
        //echoN($pronounDataFileName);
        // GET XML FILE CONTENT of the current SURA by customizing file name
        $xmlContent = file_get_contents($pronounDataFileName);
        // LOAD XML OBJECT - trim used to avoid first line empty error
        $chapter = simplexml_load_string(trim(stripHTMLComments($xmlContent)));
        // LOAD CONCEPTS
        foreach ($chapter->verse as $index => $verseObj) {
            $verseLocalSegmentIndex = 0;
            $versesCount++;
            // Loop on all children
            foreach ($verseObj->children() as $index => $childObj) {
                // get tag name
                $tagName = $childObj->getName();
                $verseLocalSegmentIndex++;
                $segmentsCount++;
                // we got a prounoun tag
                if ($tagName == "pron") {
                    $pronounsCount++;
                    // get the verse including this pronoun
                    $verseID = (string) $verseObj['id'];
                    // get pronoun concept ID and antecendent
                    $conceptID = (string) $childObj['con'];
                    $pronounAntecedent = (string) $childObj['ant'];
                    // get segment ID and word form
                    $quranaSegmentID = (string) $childObj->seg['id'];
                    $quranaSegmentForm = (string) $childObj->seg->__toString();
                    $quranaSegmentForm = trim($quranaSegmentForm);
                    // convert Qurana Segment ID to QAC segment for cross referenceing
                    $qacSegment = getQACSegmentByQuranaSeqment($suraID, $verseID, $verseLocalSegmentIndex, $quranaSegmentForm);
                    //echo("$qacSegment,$quranaSegmentID\n");
                    // get the id of the word where the segment is
                    $wordId = $qacSegmentToWordTable[$qacSegment];
                    $quranaConcecpts[$conceptID]["FREQ"]++;
                    // fill pronouns array
                    $quranaResolvedPronouns["{$suraID}:{$verseID}:{$wordId}"][] = array("CONCEPT_ID" => $conceptID, "SEGMENT_INDEX" => $qacSegment, "ANTECEDENT_SEGMENTS" => preg_split("/ /", $pronounAntecedent));
                    if ($lang == "EN") {
                        addToInvertedIndex($invertedIndexBatchApcArr, $lang, strtolower($quranaConcecpts[$conceptID]['EN']), $suraID - 1, $verseID - 1, $wordId, "PRONOUN_ANTECEDENT", $quranaSegmentForm);
                    } else {
                        addToInvertedIndex($invertedIndexBatchApcArr, $lang, $quranaConcecpts[$conceptID]['AR'], $suraID - 1, $verseID - 1, $wordId, "PRONOUN_ANTECEDENT", $quranaSegmentForm);
                    }
                }
            }
        }
    }
    //echoN("SEG:$segmentsCount PRON:$pronounsCount");
    //preprint_r($quranaResolvedPronouns);
    //preprint_r($INVERTED_INDEX);exit;
    ##############################################################
    // free resources
    $xmlContent = null;
    $concepts = null;
    unset($xmlContent);
    unset($concepts);
    //echo preprint_r($QURAN_TEXT);;
    if (strpos($lang, "AR") !== false) {
        $stopWordsArr = getStopWordsArrByFile($arabicStopWordsFile);
        $stopWordsStrictL2Arr = getStopWordsArrByFile($arabicStopWordsFileL2);
        $pauseMarksArr = getPauseMarksArrByFile($pauseMarksFile);
    } else {
        $stopWordsArr = getStopWordsArrByFile($englishStopWordsFile);
        $pauseMarksArr = array();
    }
    //preprint_r($stopWordsArr);
    //preprint_r($pauseMarksArr);
    if (strpos($lang, "AR") !== false) {
        // SETTING PAUSE MARKS COUNTER ARRAY
        foreach ($pauseMarksArr as $pauseMark => $constant) {
            $TOTALS_ARR['PAUSEMARKS'][$pauseMark] = 0;
        }
    }
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $TOTALS_ARR['TOTAL_PER_SURA'][$s] = array();
        $suraNameLang = $lang;
        if ($suraNameLang == "AR_UTH") {
            $suraNameLang = "AR";
        }
        $suraNameLang = strtolower($lang);
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['NAME'] = $META_DATA['SURAS'][$s]['name_' . $suraNameLang];
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['CHARS'] = 0;
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['NRWORDS'] = 0;
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['WORDS'] = 0;
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['VERSES'] = 0;
        $WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s] = array();
    }
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        //echoN($quranXMLObj->sura[$s]['name']);
        $suraSize = $META_DATA['SURAS'][$s]['ayas'];
        /* VERSES LOOP **/
        for ($a = 0; $a < $suraSize; $a++) {
            //$verseText =
            $verseText = $QURAN_TEXT[$s][$a];
            //echoN("- ".$verseText);
            $wordsArr = preg_split("/ /", $verseText);
            /** CALCULATE VERSE LENGTH **/
            $wordsInVerseIncludingPauses = count($wordsArr);
            $wordsInVerse = $wordsInVerseIncludingPauses - count(array_intersect($wordsArr, array_keys($pauseMarksArr)));
            if ($wordsInVerse >= $maxVerseLength) {
                $maxVerseLength = $wordsInVerse;
                $maxVerse = $verseText;
            }
            if ($wordsInVerse <= $minWordLength) {
                if ($wordsInVerse == $minWordLength) {
                    if (mb_strlen($verseText) < mb_strlen($minVerse)) {
                        $minVerseLength = $wordsInVerse;
                        $minVerse = $verseText;
                    }
                } else {
                    $minVerseLength = $wordsInVerse;
                    $minVerse = $verseText;
                }
            }
            $avgVerseLength += $wordsInVerse;
            /** END CALCULATE VERSE LENGTH **/
            $wordIndex = 0;
            /* WORDS IN VERSE  LOOP **/
            foreach ($wordsArr as $word) {
                $word = trim($word);
                // PAUSE MARK
                if (strpos($lang, "AR") !== false && isset($pauseMarksArr[$word])) {
                    $TOTALS_ARR['PAUSEMARKS'][$word]++;
                    continue;
                } else {
                    // SAJDAH MARK
                    if ($word == $sajdahMark) {
                        $TOTALS_ARR['SAJDAT_TELAWA']['COUNT']++;
                        $TOTALS_ARR['SAJDAT_TELAWA']['VERSES'][] = array($s, $a, $verseText);
                        continue;
                    } else {
                        // SAKTA LATIFA
                        if ($word == $saktaLatifaMark) {
                            $TOTALS_ARR['SAKTA_LATIFA']['COUNT']++;
                            $TOTALS_ARR['SAKTA_LATIFA']['VERSES'][] = array($s, $a, $verseText);
                            continue;
                        }
                    }
                }
                // Mainly for english translations
                if ($lang == "EN") {
                    $word = strtolower(cleanAndTrim($word));
                }
                // ignore empty words - result of trimming
                if (empty($word)) {
                    // the case of " - " in english translations
                    continue;
                }
                $wordIndex++;
                if ($wordIndex == 1) {
                    if (!isset($WORDS_FREQUENCY_ARR['VERSE_BEGINNINGS'][$word])) {
                        $WORDS_FREQUENCY_ARR['VERSE_BEGINNINGS'][$word] = 0;
                    }
                    $WORDS_FREQUENCY_ARR['VERSE_BEGINNINGS'][$word]++;
                } else {
                    if ($wordIndex == count($wordsArr)) {
                        if (!isset($WORDS_FREQUENCY_ARR['VERSE_ENDINGS'][$word])) {
                            $WORDS_FREQUENCY_ARR['VERSE_ENDINGS'][$word] = 0;
                        }
                        $WORDS_FREQUENCY_ARR['VERSE_ENDINGS'][$word]++;
                    }
                }
                $TOTALS_ARR['WORDS']++;
                if (!isset($WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s])) {
                    $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s] = array();
                }
                if (!isset($WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a])) {
                    $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a] = array();
                }
                if (!isset($WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a][$word])) {
                    $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a][$word] = 0;
                }
                $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a][$word]++;
                if (!isset($WORDS_FREQUENCY_ARR['WORDS'][$word])) {
                    $WORDS_FREQUENCY_ARR['WORDS'][$word] = 0;
                }
                $WORDS_FREQUENCY_ARR['WORDS'][$word]++;
                $TOTALS_ARR['TOTAL_PER_SURA'][$s]['WORDS']++;
                if (!isset($WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s][$word])) {
                    $WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s][$word] = 0;
                }
                $WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s][$word]++;
                //if (!isset($INVERTED_INDEX[$word]) ) $INVERTED_INDEX[$word] = array();
                //$INVERTED_INDEX[$word][] = array("SURA"=>$s,"AYA"=>$a,"INDEX_IN_AYA_EMLA2Y"=>$wordIndex,"WORD_TYPE"=>"NORMAL_WORD");
                addToInvertedIndex($invertedIndexBatchApcArr, $lang, $word, $s, $a, $wordIndex, "NORMAL_WORD");
                /** CALCULATE WORD LENGTHG **/
                $wordLength = mb_strlen($word);
                if ($wordLength >= $maxWordLength) {
                    $maxWordLength = $wordLength;
                    $maxWord = $word;
                }
                if ($wordLength <= $minWordLength) {
                    $minWordLength = $wordLength;
                    $minWord = $word;
                }
                $avgWordLength += $wordLength;
                /** END CALCULATE WORD LENGTHG **/
                $charsInWordArr = preg_split("//u", $word, -1, PREG_SPLIT_NO_EMPTY);
                /* CHARS IN EACH WORD  LOOP **/
                foreach ($charsInWordArr as $char) {
                    //echoN($char." ".in_array($char,$pauseMarksArrTemp));
                    // SPACE
                    if ($char == " ") {
                        continue;
                    }
                    $TOTALS_ARR['CHARS']++;
                    $TOTALS_ARR['TOTAL_PER_SURA'][$s]['CHARS']++;
                }
            }
            $TOTALS_ARR['VERSES']++;
            $TOTALS_ARR['TOTAL_PER_SURA'][$s]['VERSES']++;
            // 					  			if ( $TOTALS_ARR['VERSES']>30)
            // 					  				exit;
        }
        /** END AYA's LOOP **/
    }
    /** END SURA's LOOP **/
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $TOTALS_ARR['TOTAL_PER_SURA'][$s]['NRWORDS'] = count($WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s]);
        arsort($WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s]);
    }
    $TOTALS_ARR['NRWORDS'] = count($WORDS_FREQUENCY_ARR['WORDS']);
    $TOTALS_ARR['PAUSEMARKS_AGGREGATION'] = 0;
    // AGGREGATE PAUSE MARKS
    foreach ($TOTALS_ARR['PAUSEMARKS'] as $pmLabel => $pmCount) {
        //echo $pmLabel.$pmCount;
        $TOTALS_ARR['PAUSEMARKS_AGGREGATION'] += $pmCount;
    }
    /**
     * CALCULATING TF-IDF TABLE
     */
    foreach ($WORDS_FREQUENCY_ARR['WORDS'] as $wordLabel => $wordFreq) {
        $termFrequency = $wordFreq;
        $termFrequencyPercentage = $termFrequency / $TOTALS_ARR['WORDS'] * 100;
        // DOCUMENT = VERSE
        $documentFrequency = 0;
        $inverseDocumentFrequency = 0;
        //CHECKING VERSES
        for ($s = 0; $s < $numberOfSuras; $s++) {
            //$versesPerSura = $TOTALS_ARR['TOTAL_PER_SURA'][$s]['VERSES'];
            //for ($a=0;$a<$versesPerSura;$a++)
            //{
            if (isset($WORDS_FREQUENCY_ARR['WORDS_PER_SURA'][$s][$wordLabel])) {
                //= $WORDS_FREQUENCY_ARR['TOTAL_PER_VERSE'][$s][$a][$wordLabel]
                $documentFrequency++;
            }
            //}
        }
        $inverseDocumentFrequency = log($numberOfSuras / $documentFrequency, 10);
        $TFIDF = $termFrequency * $inverseDocumentFrequency;
        //echoN("WORD:$wordLabel PRCG:$termFrequencyPercentage TF:$termFrequency DF:$documentFrequency IDF:$inverseDocumentFrequency TFIDF:$TFIDF ");
        $WORDS_FREQUENCY_ARR['WORDS_TFIDF'][$wordLabel] = array("TF" => $termFrequency, "TPC" => $termFrequencyPercentage, "DF" => $documentFrequency, "IDF" => $inverseDocumentFrequency, "TFIDF" => $TFIDF);
    }
    /** END OF TFIDF TABLE **/
    rsortBy($WORDS_FREQUENCY_ARR['WORDS_TFIDF'], 'TF');
    //preprint_r($WORDS_FREQUENCY_ARR['WORDS_TFIDF']);
    /** Continuing  WORD/VERSE LENGTH CALCULATE **/
    $avgWordLength = $avgWordLength / $TOTALS_ARR['WORDS'];
    $avgVerseLength = $avgVerseLength / $TOTALS_ARR['VERSES'];
    /*
    echoN($minWordLength." - ".$minWord);
    echoN($maxWordLength." - ".$maxWord);
    echoN($avgWordLength);
    
    echoN($minVerseLength." - ".$minVerse);
    echoN($maxVerseLength." - ".$maxVerse);
    echoN($avgVerseLength);
    */
    $TOTALS_ARR['MIN_WORD_LENGTH'] = $minWordLength;
    $TOTALS_ARR['AVG_WORD_LENGTH'] = round($avgWordLength, 2);
    $TOTALS_ARR['MAX_WORD_LENGTH'] = $maxWordLength;
    $TOTALS_ARR['MIN_WORD'] = $minWord;
    $TOTALS_ARR['MAX_WORD'] = $maxWord;
    $TOTALS_ARR['MIN_VERSE_LENGTH'] = $minVerseLength;
    $TOTALS_ARR['AVG_VERSE_LENGTH'] = round($avgVerseLength, 2);
    $TOTALS_ARR['MAX_VERSE_LENGTH'] = $maxVerseLength;
    $TOTALS_ARR['MIN_VERSE'] = $minVerse;
    $TOTALS_ARR['MAX_VERSE'] = $maxVerse;
    /** end CALCULATE WORD/VERSE LENGTH **/
    //exit;;
    arsort($WORDS_FREQUENCY_ARR['WORDS']);
    arsort($WORDS_FREQUENCY_ARR['VERSE_BEGINNINGS']);
    arsort($WORDS_FREQUENCY_ARR['VERSE_ENDINGS']);
    //preprint_r($WORDS_FREQUENCY_ARR);
    /////// LOADING LANGUAGE RESOURCE FILES
    $resourceFile = $englishResourceFile;
    if (strpos($lang, "AR") !== false) {
        $resourceFile = $arabicResourceFile;
    }
    $languageResourcesArr = file($resourceFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    $RESOURCES = array();
    foreach ($languageResourcesArr as $index => $resourceLine) {
        $resourcePairsArr = preg_split("/\\|/", $resourceLine);
        $resourceID = $resourcePairsArr[0];
        $resourceValue = $resourcePairsArr[1];
        $RESOURCES[$resourceID] = $resourceValue;
    }
    //$MODEL_CORE['LOADED']=1;
    //$MODEL_CORE[$lang]['META_DATA'] = $META_DATA;
    addValueToMemoryModel($lang, "MODEL_CORE", "META_DATA", "", $META_DATA);
    //$MODEL_CORE[$lang]['TOTALS'] = $TOTALS_ARR;
    addValueToMemoryModel($lang, "MODEL_CORE", "TOTALS", "", $TOTALS_ARR);
    //$MODEL_CORE[$lang]['WORDS_FREQUENCY'] = $WORDS_FREQUENCY_ARR;
    addValueToMemoryModel($lang, "MODEL_CORE", "WORDS_FREQUENCY", "", $WORDS_FREQUENCY_ARR);
    addValueToMemoryModel($lang, "MODEL_CORE", "WORDS_FREQUENCY", "WORDS", $WORDS_FREQUENCY_ARR['WORDS']);
    //$MODEL_CORE[$lang]['QURAN_TEXT'] = $QURAN_TEXT;
    addValueToMemoryModel($lang, "MODEL_CORE", "QURAN_TEXT", "", $QURAN_TEXT);
    //$MODEL_CORE[$lang]['RESOURCES']=$RESOURCES;
    addValueToMemoryModel($lang, "MODEL_CORE", "RESOURCES", "", $RESOURCES);
    //$MODEL_CORE[$lang]['STOP_WORDS']= $stopWordsArr;
    addValueToMemoryModel($lang, "MODEL_CORE", "STOP_WORDS", "", $stopWordsArr);
    //$MODEL_CORE[$lang]['STOP_WORDS_STRICT_L2']= $stopWordsStrictL2Arr;
    addValueToMemoryModel($lang, "MODEL_CORE", "STOP_WORDS_STRICT_L2", "", $stopWordsStrictL2Arr);
    //file_put_contents("$serializedModelFile.core", (json_encode($MODEL_CORE)));
    //$MODEL_SEARCH[$lang]['INVERTED_INDEX'] = $INVERTED_INDEX;
    /*$invertedIndexIterator = getAPCIterator("MODEL_SEARCH.*");
    			
    		foreach($invertedIndexIterator as $cursor)
    		{
    			preprint_r($cursor);
    		}*/
    addToMemoryModelBatch($invertedIndexBatchApcArr);
    //$res = apc_store("MODEL_CORE[$lang]",$MODEL_CORE[$lang]);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_CORE[$lang]"); }
    //$res = apc_store("MODEL_SEARCH[$lang]",$MODEL_SEARCH[$lang]);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[$lang]"); }
    //file_put_contents("$serializedModelFile.search", (json_encode($MODEL_SEARCH)));
    if ($lang == "AR") {
        //$MODEL_QAC['QAC_MASTERTABLE'] = $qacMasterSegmentTable;
        //$MODEL_QAC['QAC_POS'] = $qacPOSTable;
        addToMemoryModelBatch($qacPOSTableBatchApcArr);
        //$MODEL_QAC['QAC_FEATURES'] = $qacFeaturesTable;
        addToMemoryModelBatch($qacFeatureTableBatchApcArr);
        //$MODEL_QAC['QAC_ROOTS_LOOKUP'] = $rootsLookupArray;
        //file_put_contents("$serializedModelFile.qac", (json_encode($MODEL_QAC)));
        //$res = apc_store("MODEL_QAC",$MODEL_QAC);
        //if ( $res===false){ throw new Exception("Can't cache MODEL_QAC"); }
        rsortBy($quranaConcecpts, 'FREQ');
        $MODEL_QURANA['QURANA_CONCEPTS'] = $quranaConcecpts;
        $MODEL_QURANA['QURANA_PRONOUNS'] = $quranaResolvedPronouns;
        //file_put_contents("$serializedModelFile.qurana", (json_encode($MODEL_QURANA)));
        $res = apc_store("MODEL_QURANA", $MODEL_QURANA);
        if ($res === false) {
            throw new Exception("Can't cache MODEL_QURANA");
        }
    }
    //preprint_r($MODEL['INVERTED_INDEX'] );exit;
    //preprint_r($WORDS_FREQUENCY_ARR['VERSE_ENDINGS']);
    //echo serialize(json_encode($MODEL));
    //preprint_r($MODEL['EN']);
}

示例#4

显示文件

文件： extract-ontology.php 项目： AhmedAMohamed/qurananalysis

        $weight = 0;
        foreach ($biGramWords as $biGramTerm) {
            $weight += floatval($WORDS_FREQUENCY['WORDS_TFIDF'][$biGramTerm]['TFIDF']);
        }
        $weight = $weight / 2;
        //////
        //$weight = round($freq/$maxConceptFreq,2);
        $quranaConceptArr = getQuranaConceptEntryByARWord($biGramConcept);
        // ADD QURANA TRANSLATION FOR QURANA BIGRAMS
        $engTranslation = ucfirst($quranaConceptArr['EN']);
        addNewConcept($finalConcepts, $biGramConcept, "A-BOX", "PHRASE", $freq, $engTranslation);
        $finalConcepts[$biGramConcept]['EXTRA']['POS'] = $pos;
        $finalConcepts[$biGramConcept]['EXTRA']['WEIGHT'] = $weight;
        $finalConcepts[$biGramConcept]['EXTRA']['IS_QURANA_NGRAM_CONCEPT'] = true;
    }
    rsortBy($finalConcepts, "FREQ");
    echoN("FINAL CONCEPTS COUNT:" . count($finalConcepts));
    //preprint_r($finalConcepts);
    file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1", serialize($finalConcepts));
    file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.all.terms", serialize($finalTerms));
}
if ($GENERATE_NONTAXONOMIC_RELATIONS) {
    $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1"));
    //preprint_r($finalConcepts);exit;
    $MODEL_CORE_UTH = loadUthmaniDataModel();
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $suraSize = count($MODEL_CORE_UTH['QURAN_TEXT'][$s]);
        /* VERSES LOOP **/
        for ($a = 0; $a < $suraSize; $a++) {
            $i++;