#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#    You can use Quran Analysis code, framework or corpora in your website
#	 or application (commercial/non-commercial) provided that you link
#    back to www.qurananalysis.com and sufficient credits are given.
#
#  ====================================================================
require_once "../global.settings.php";
require_once "../libs/core.lib.php";
loadModels("core,qac", "AR");
$LEMMA_TO_SIMPLE_WORD_MAP = loadLemmaToSimpleMappingTable();
printHTMLPageHeader();
$wordsInfoArr = unserialize(file_get_contents("../data/cache/words.info.all"));
$stopWordsArr = file(dirname(__FILE__) . "/../data/merged-stoplist-files.2.sorted.unique.txt");
$verbsInfoArr = array();
getWordsByPos($verbsInfoArr, "V");
//preprint_r($verbsInfoArr);exit;
$verbsArr = array();
foreach ($verbsInfoArr as $word => $infoArr) {
    $simpleWord = $infoArr['SIMPLE_WORD'];
    $verbsArr[] = $simpleWord;
}
//preprint_r($verbsArr);exit;
//$stopWordsArr = array_merge($stopWordsArr,$verbsArr);
$stopWordsFromQuran = array();
$WORDS_FREQUENCY = getModelEntryFromMemory("AR", "MODEL_CORE", "WORDS_FREQUENCY", "");
foreach ($stopWordsArr as $key => $word) {
    $word = trim($word);
    if (isset($WORDS_FREQUENCY['WORDS'][$word])) {
        $countOfPosTagsForThisWord = count($wordsInfoArr[$word]['POS']);
        // the only PoS tag for this word is ADJ, N or Pn which can't be stop word
    foreach ($WORDS_FREQUENCY['WORDS_TFIDF'] as $wordLabel => $wordFreqArr) {
        $wordsInfoArr[$wordLabel] = getWordInfo($wordLabel, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, true);
    }
    file_put_contents("../data/cache/words.info.all", serialize($wordsInfoArr));
}
$finalConcepts = array();
$relationsArr = array();
if ($GENERATE_TERMS == true) {
    $finalTerms = array();
    getWordsByPos($finalTerms, "PN");
    echoN("PN:<b>" . count($finalTerms) . "</b>");
    $last = count($finalTerms);
    getWordsByPos($finalTerms, "ADJ");
    echoN("ADJ:<b>" . (count($finalTerms) - $last) . "</b>");
    $last = count($finalTerms);
    getWordsByPos($finalTerms, "N");
    echoN("N:<b>" . (count($finalTerms) - $last) . "</b>");
    ?>
					
				
					<?php 
    rsortBy($finalTerms, "FREQ");
    echoN("<hr>");
    echoN("Terms Count:<b>" . count($finalTerms) . "</b>");
    $PRESENTATION = "TABLE";
    //$PRESENTATION = "BLOCKS";
    if ($PRESENTATION == "TABLE") {
        echo "<TABLE>";
    }
    //$finalTerms = array_slice($finalTerms, 0,70);
    echoN("Terms Count Capped:<b>" . count($finalTerms) . "</b>");