# along with this program. If not, see <http://www.gnu.org/licenses/>. # # You can use Quran Analysis code, framework or corpora in your website # or application (commercial/non-commercial) provided that you link # back to www.qurananalysis.com and sufficient credits are given. # # ==================================================================== require_once "../global.settings.php"; require_once "../libs/core.lib.php"; loadModels("core,qac", "AR"); $LEMMA_TO_SIMPLE_WORD_MAP = loadLemmaToSimpleMappingTable(); printHTMLPageHeader(); $wordsInfoArr = unserialize(file_get_contents("../data/cache/words.info.all")); $stopWordsArr = file(dirname(__FILE__) . "/../data/merged-stoplist-files.2.sorted.unique.txt"); $verbsInfoArr = array(); getWordsByPos($verbsInfoArr, "V"); //preprint_r($verbsInfoArr);exit; $verbsArr = array(); foreach ($verbsInfoArr as $word => $infoArr) { $simpleWord = $infoArr['SIMPLE_WORD']; $verbsArr[] = $simpleWord; } //preprint_r($verbsArr);exit; //$stopWordsArr = array_merge($stopWordsArr,$verbsArr); $stopWordsFromQuran = array(); $WORDS_FREQUENCY = getModelEntryFromMemory("AR", "MODEL_CORE", "WORDS_FREQUENCY", ""); foreach ($stopWordsArr as $key => $word) { $word = trim($word); if (isset($WORDS_FREQUENCY['WORDS'][$word])) { $countOfPosTagsForThisWord = count($wordsInfoArr[$word]['POS']); // the only PoS tag for this word is ADJ, N or Pn which can't be stop word
foreach ($WORDS_FREQUENCY['WORDS_TFIDF'] as $wordLabel => $wordFreqArr) { $wordsInfoArr[$wordLabel] = getWordInfo($wordLabel, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, true); } file_put_contents("../data/cache/words.info.all", serialize($wordsInfoArr)); } $finalConcepts = array(); $relationsArr = array(); if ($GENERATE_TERMS == true) { $finalTerms = array(); getWordsByPos($finalTerms, "PN"); echoN("PN:<b>" . count($finalTerms) . "</b>"); $last = count($finalTerms); getWordsByPos($finalTerms, "ADJ"); echoN("ADJ:<b>" . (count($finalTerms) - $last) . "</b>"); $last = count($finalTerms); getWordsByPos($finalTerms, "N"); echoN("N:<b>" . (count($finalTerms) - $last) . "</b>"); ?> <?php rsortBy($finalTerms, "FREQ"); echoN("<hr>"); echoN("Terms Count:<b>" . count($finalTerms) . "</b>"); $PRESENTATION = "TABLE"; //$PRESENTATION = "BLOCKS"; if ($PRESENTATION == "TABLE") { echo "<TABLE>"; } //$finalTerms = array_slice($finalTerms, 0,70); echoN("Terms Count Capped:<b>" . count($finalTerms) . "</b>");