function answerUserQuestion($query, $queryWordsArr, $taggedSignificantWords, $scoringTable, $lang) { global $is_a_relation_name_ar; $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", ""); // answering by relevance and similarity $conceptsFromTaxRelations = extendQueryWordsByConceptTaxRelations($taggedSignificantWords, $lang, true); $COMMON_CONCEPTS_FACTOR = 10; $COMMON_QUESTION_TYPE_CONCEPTS_FACTOR = 10; $COMMON_ROOTS_FACTOR = 10; $COMMON_DERIVATIONS_FACTOR = 10; $scoredAnswerVersesArr = array(); //preprint_r($taggedSignificantWords); //echoN($query); $questionType = containsQuestionWords($query, $lang); ////////// COMMON CONCEPTS IN QUESTION $conceptsInQuestionTextArr = getConceptsFoundInText($query, $lang); //preprint_r($conceptsInQuestionTextArr); /////////////////////////////////////// /////////// GET CONCEPTS FOR THE QUESTION TYPE /// GET INSTANCE CONCEPTS FROM QUESTION TYPE CLASS $questionType = cleanAndTrim(strtolower($questionType)); //echoN($questionType); //$conceptID = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$questionType]; $conceptID = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $questionType); //echoN($conceptID); //$relationsOfConceptAsTarget = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptID]; $relationsOfConceptAsTarget = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptID); $questionTypeConceptsArr = array(); foreach ($relationsOfConceptAsTarget as $index => $relArr) { $verb = $relArr["link_verb"]; $subject = $relArr["source"]; //echoN("CC:$is_a_relation_name_ar|$verb"); if ($verb != $is_a_relation_name_ar) { continue; } $questionTypeConceptsArr[] = $subject; } ////////////////////////////////////////////// $debugArr = array(); //// Answering by similarity and relevance foreach ($scoringTable as $documentID => $documentScoreArr) { //preprint_r($documentScoreArr); $relevanceReverseOrderIndex--; $SURA = $documentScoreArr['SURA']; $AYA = $documentScoreArr['AYA']; $TEXT = $QURAN_TEXT[$SURA][$AYA]; $score = $documentScoreArr['SCORE']; //echoN("SCORE BEFORE QUESTION RELEVANCE:$score"); if ($lang == "EN") { $TEXT = strtolower($TEXT); } //echoN($TEXT); $conceptsInTextArr = getConceptsFoundInText($TEXT, $lang); //preprint_r($conceptsInTextArr); /////////// COMMON CONCEPTS BWTEEEN QUESTION AND A VERSE TEXT $commonQuestionVerseConceptsCount = getIntersectionCountOfTwoArrays(array_keys($conceptsInQuestionTextArr), array_keys($conceptsInTextArr)); //echoN("Common Concepts:$commonQuestionVerseConceptsCount"); $debugArr[$documentID]['COMMON_CONCEPTS'] = $commonQuestionVerseConceptsCount; $debugArr[$documentID]['COMMON_CONCEPTS_LIST'] = join(" ", array_intersect(array_keys($conceptsInQuestionTextArr), array_keys($conceptsInTextArr))); //preprint_r($debugArr);exit; $score += $commonQuestionVerseConceptsCount * $COMMON_CONCEPTS_FACTOR; /////////////////////////////////////////////////////////// //preprint_r($questionTypeConceptsArr); //preprint_r(array_keys($conceptsInTextArr)); $numberOfSharedConceptsForThisQuestionType = getIntersectionCountOfTwoArrays($questionTypeConceptsArr, array_keys($conceptsInTextArr)); //echoN($numberOfSharedConceptsForThisQuestionType); $score += $numberOfSharedConceptsForThisQuestionType * $COMMON_QUESTION_TYPE_CONCEPTS_FACTOR; $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS'] = $numberOfSharedConceptsForThisQuestionType; $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS_LIST'] = join(" ", array_intersect($questionTypeConceptsArr, array_keys($conceptsInTextArr))); //// QUESION-VERSE SIMILARITY MESUREMENT (wITH DERIVATIONS CONSIDERED) $wordsInVerseTextArr = explode(" ", $TEXT); $derivationHandledB4 = array(); $commonDerivations = 0; if ($lang == "EN") { foreach ($taggedSignificantWords as $wordInQuestion => $pos) { //echoN("$word $pos"); // for words like i (NOUN in the lexicon for some reson ) if (mb_strlen($wordInQuestion) <= 2) { continue; } if ($pos == "VBN" || $pos == "VBD" || $pos == "VBG" || $pos == "NN" || $pos == "NNS") { foreach ($wordsInVerseTextArr as $index => $wordInArray) { $wordInArray = cleanAndTrim($wordInArray); if (mb_strlen($wordInArray) <= 2) { continue; } // if any word (noun/verb) in the quetion is a substring if (strpos($wordInArray, $wordInQuestion) !== false || strpos($wordInQuestion, $wordInArray) !== false) { if (isset($derivationHandledB4[$wordInArray])) { continue; } //echoN("$word is SS in VerseText"); $commonDerivations++; $derivationHandledB4[$wordInArray] = 1; //$debugArr[$documentID]['COMMON_DERIVATIONS_LIST']= //$debugArr[$documentID]['COMMON_DERIVATIONS_LIST']."|".$wordInArray; } } } } $score += $commonDerivations * $COMMON_DERIVATIONS_FACTOR; $debugArr[$documentID]['COMMON_DERIVATIONS'] = $commonDerivations; } else { $questionWordsRootsArr = array(); foreach ($taggedSignificantWords as $wordInQuestion => $pos) { if (mb_strlen($wordInQuestion) <= 2) { continue; } if ($pos == "NN" || $pos == "NNS") { //echoN("===$wordInQuestion"); $root = getRootOfSimpleWord($wordInQuestion, array("N", "V")); if (!empty($root)) { $questionWordsRootsArr[] = $root; } } } //preprint_r($questionWordsRootsArr); //exit; $verseWordsRootsArr = array(); foreach ($wordsInVerseTextArr as $index => $wordInArray) { if (mb_strlen($wordInArray) <= 2) { continue; } $root = getRootOfSimpleWord($wordInArray, array("N", "V")); if (!empty($root)) { $verseWordsRootsArr[] = $root; } } //preprint_r($verseWordsRootsArr); } $commonRootsCount = getIntersectionCountOfTwoArrays($verseWordsRootsArr, $questionWordsRootsArr); $score += $commonRootsCount * $COMMON_ROOTS_FACTOR; $debugArr[$documentID]['COMMON_ROOTS'] = $commonRootsCount; //echoN($commonRootsCount); ///////////////////////////////////////////////////////// //echoN("SCORE AFTER QUESTION RELEVANCE:$score"); $scoringTable[$documentID]['SCORE'] = $score; $scoredAnswerVersesArr[$documentID] = $scoringTable[$documentID]; } rsortBy($scoredAnswerVersesArr, "SCORE"); //preprint_r($debugArr); //preprint_r($scoredAnswerVersesArr);exit; $scoredAnswerVersesArr = array_slice($scoredAnswerVersesArr, 0, 3); //// REMOVE ANY VERSE FROM THE FINAL LIST WHICH HAS NO OBVIOUS SIMILARITY WITH THE QUESTION foreach ($scoredAnswerVersesArr as $documentID => $verseArr) { //preprint_r($debugArr[$documentID]); if ($debugArr[$documentID]['COMMON_ROOTS'] == 0 && $debugArr[$documentID]['COMMON_CONCEPTS'] == 0 && $debugArr[$documentID]['COMMON_QUESTION_TYPE_CONCEPTS'] == 0 && $debugArr[$documentID]['COMMON_DERIVATIONS'] == 0) { unset($scoredAnswerVersesArr[$documentID]); } } ///////////////////////////////////// //preprint_r($scoredAnswerVersesArr); //preprint_r($scoredAnswerVersesArr); return array("ANSWER_CONCEPTS" => $conceptsFromTaxRelations, "ANSWER_VERSES" => $scoredAnswerVersesArr); }
$originalQuery = $query; $script = $_GET['script']; if (empty($script)) { $script = "simple"; } ///DETEDCT LANGUAGE //LOCATION SIGNIFICANT if (isArabicString($query)) { $lang = "AR"; $direction = "rtl"; } //echoN(memory_get_peak_usage()); //echoN(time()); loadModels("core,search,qac,ontology", $lang); //echoN(time()); $RESOURCES = getModelEntryFromMemory($lang, "MODEL_CORE", "RESOURCES", ""); $META_DATA = getModelEntryFromMemory($lang, "MODEL_CORE", "META_DATA", ""); $significantWords = array(); //echoN($query);exit; //echoN(memory_get_peak_usage()); //$TRANSLATION_MAP_EN_TO_AR = apc_fetch("WORDS_TRANSLATIONS_EN_AR"); //preprint_r($TRANSLATION_MAP_EN_TO_AR); $isPhraseSearch = false; $isQuestion = false; $isColumnSearch = false; $columnSearchType = null; $columnSearchKeyValParams = null; $noDerivationsConstraint = false; $noOntologyExtentionConstraint = false; $isConceptSearch = false; $isTransliterationSearch = false; $matchesCount = preg_match("/\".*?\"/", $query);
function getRootOfSimpleWord($wordSimple, $expectedPosTagsArr) { global $MODEL_SEARCH, $MODEL_QAC; $wordUthmani = getItemFromUthmaniToSimpleMappingTable($wordSimple); //echoN($MODEL_QAC['QAC_ROOTS_LOOKUP'][$wordUthmani]); return getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_ROOTS_LOOKUP", $wordUthmani); //return $MODEL_QAC['QAC_ROOTS_LOOKUP'][$wordUthmani]; }
<?php require "./analysis.template.start.code.php"; ?> <div id='main-container'> <?php include_once "help-content.php"; ?> <?php $TOTALS = getModelEntryFromMemory($lang, "MODEL_CORE", "TOTALS", ""); $RESOURCES = getModelEntryFromMemory($lang, "MODEL_CORE", "RESOURCES", ""); ?> <div > <div id="graph-verse-or-text-selection" > <a href='javascript:openFullGraph()'>Open Full Quran Graph</a> <br/> <br/> <div style="direction:<?php echo $lang == "AR" ? "rtl" : "ltr"; ?> "> <?php
<!-- WORDCLOUD AREA --> <div id="result-wordcloud-area"> <div id='result-wordcloud-title' <?php echo returnDirectionStyle($lang); ?> > <?php echo $RESOURCES['INDEX_WORDCLOUD_TITLE']; ?> </div> <div id='result-wordcloud-content'> <?php $wordCloudArr = searchResultsToWordcloud($searchResultsTextArr, $lang, 50); //shuffle_assoc($wordCloudArr); //preprint_r($wordCloudArr,1); $STOP_WORDS = getModelEntryFromMemory($lang, "MODEL_CORE", "STOP_WORDS", ""); //$STOP_WORDS_STRICT_L2 = getModelEntryFromMemory($lang, "MODEL_CORE", "STOP_WORDS_STRICT_L2", ""); $i = 0; foreach ($wordCloudArr as $wordLabel => $wordFreq) { if (isset($originalQueryWordsArrSwaped[$wordLabel])) { continue; } if ($lang == "AR") { if (isset($STOP_WORDS[$wordLabel])) { continue; } } else { if (isset($STOP_WORDS[$wordLabel])) { continue; } }
} if ($lang == "EN") { echoN("Pause Marks are supported only for Arabic, you chose English !"); exit; } loadModels("core", $lang); $pauseMark = $_GET['mark']; // nothing passed if ($pauseMark == "") { exit; } $markedVerses = array(); $unrepeatedWords = array(); $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", ""); $META_DATA = getModelEntryFromMemory($lang, "MODEL_CORE", "META_DATA", ""); $TOTALS = getModelEntryFromMemory($lang, "MODEL_CORE", "TOTALS", ""); $i = 0; /* SURA'S LOOP **/ for ($s = 0; $s < $numberOfSuras; $s++) { $suraSize = count($QURAN_TEXT[$s]); /* VERSES LOOP **/ for ($a = 0; $a < $suraSize; $a++) { $i++; $verseText = $QURAN_TEXT[$s][$a]; if (mb_strpos($verseText, $pauseMark) !== false) { $suraName = $META_DATA['SURAS'][$s]['name_' . strtolower($lang)]; $verseLocation = $s + 1 . ":" . ($a + 1); // done in last preg replace if (isset($markedVerses[$verseLocation])) { continue; }
$LEMMA_TO_SIMPLE_WORD_MAP = loadLemmaToSimpleMappingTable(); printHTMLPageHeader(); $wordsInfoArr = unserialize(file_get_contents("../data/cache/words.info.all")); $stopWordsArr = file(dirname(__FILE__) . "/../data/merged-stoplist-files.2.sorted.unique.txt"); $verbsInfoArr = array(); getWordsByPos($verbsInfoArr, "V"); //preprint_r($verbsInfoArr);exit; $verbsArr = array(); foreach ($verbsInfoArr as $word => $infoArr) { $simpleWord = $infoArr['SIMPLE_WORD']; $verbsArr[] = $simpleWord; } //preprint_r($verbsArr);exit; //$stopWordsArr = array_merge($stopWordsArr,$verbsArr); $stopWordsFromQuran = array(); $WORDS_FREQUENCY = getModelEntryFromMemory("AR", "MODEL_CORE", "WORDS_FREQUENCY", ""); foreach ($stopWordsArr as $key => $word) { $word = trim($word); if (isset($WORDS_FREQUENCY['WORDS'][$word])) { $countOfPosTagsForThisWord = count($wordsInfoArr[$word]['POS']); // the only PoS tag for this word is ADJ, N or Pn which can't be stop word if (isset($wordsInfoArr[$word]['POS']['ADJ']) || isset($wordsInfoArr[$word]['POS']['N']) || isset($wordsInfoArr[$word]['POS']['PN'])) { if ($countOfPosTagsForThisWord > 1 && isset($wordsInfoArr[$word]['POS']['DET']) || $countOfPosTagsForThisWord == 1) { //preprint_r($wordsInfoArr[$word]['POS']); //echoN($word); continue; } } echoN($word); $stopWordsFromQuran[$word] = 1; }
addRelation($relationsArr, $type, $subclassConcept, $hasType, $parentConcept, "{$pos}", "{$is_a_relation_name_en}"); } } echoN("TAXONOMIC RELATIONS - BIGRAM PARENT :" . (count($relationsArr) - $countOfRelationsBefore)); /////////////////////////////////////////////////////////////////// echoN("FINAL TAXONOMIC RELATIONS :" . (count($relationsArr) - $countOfRelationsFirst)); echoN("BA-A:" . count($finalConcepts)); //preprint_r($finalConcepts);exit; file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage3", serialize($finalConcepts)); file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations", serialize($relationsArr)); } if ($ENRICH_CONCEPTS_METADATA_TRANSLATION_TRANSLITERATION) { $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage3")); foreach ($finalConcepts as $concept => $coneptArr) { $currentEnglishTranslation = $coneptArr['EXTRA']['TRANSLATION_EN']; $uthmaniWord = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $concept); //echoN("/$concept/"); if (empty($currentEnglishTranslation)) { $finalTranslation = null; //echoN($uthmaniWord); $finalTranslation = $WORDS_TRANSLATIONS_AR_EN[$uthmaniWord]; //echoN($finalTranslation); // WORD TRANSLATION NOT FOUND - TRY AGAIN WITH DETERMINDER 'ALEF+LAM' if (empty($finalTranslation)) { /* * REMOVED TRANSLATION BY NEAREST DERIVATION BECAUSE IT CHANGES THE MEANING * زوج = kind * */ /*if ( startsWithAL($concept)) {
} </style> </head> <body> <div id='main-container'> <div id='graph-maingraph-area'> <?php $customFreqArr = array(); $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", ""); $suraSize = count($QURAN_TEXT[$SURA]); for ($a = 0; $a < $suraSize; $a++) { $verseText = $QURAN_TEXT[$SURA][$a]; $verseTextArr = explode(" ", $verseText); foreach ($verseTextArr as $index => $word) { $word = cleanAndTrim($word); $word = strtolower($word); $customFreqArr[$word]++; } $arrOfTextToGraph[] = $verseText; } $graphObj = ontologyTextToD3Graph($MODEL_QA_ONTOLOGY, "SEARCH_RESULTS_TEXT_ARRAY", $arrOfTextToGraph, 0, array(960, 600), $lang, 1); foreach ($graphObj['nodes'] as $index => $nodeArr) { $word = strtolower($nodeArr['word']); if (isset($customFreqArr[$word])) {
// will be removed by label showExcludeFor("longdesc", $longDescAR, 2, $labelAr); } $sIndex = 1; if (isset($conceptToBeCleaned['synonym_' . $sIndex])) { echoN("SYNONYM"); while (isset($conceptToBeCleaned['synonym_' . $sIndex])) { $synonym = $conceptToBeCleaned['synonym_' . $sIndex]; if (!empty($synonym)) { showExcludeFor("synonyms", $synonym, $sIndex); } $sIndex++; } } echoN("RELATIONS"); $qaRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", ""); $relIndex = 0; foreach ($qaRelationsArr as $hash => $relArr) { $subject = $relArr['SUBJECT']; $object = $relArr['OBJECT']; $verb = $relArr['VERB']; $relationStr = "{$subject},{$verb},{$object}"; //excluded before if (in_array($relationStr, $excludedRelationsArr)) { continue; } if ($subject != $labelAr && $object != $labelAr) { continue; } $relIndex++; $text = "{$subject},{$verb},{$object}";
function getTreeNodeChildren($MODEL_QA_ONTOLOGY, $conceptNameID, $minFreq, $lang, $level, $alreadyInLevel1) { global $thing_class_name_ar, $is_a_relation_name_ar; $childrenArr = array(); if ($level++ > 5) { return; } $relationsOfConceptAsSource = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptNameID); //$relationsOfConceptAsSource = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptNameID]; foreach ($relationsOfConceptAsSource as $index => $relArr) { $verb = $relArr["link_verb"]; $subject = $relArr["source"]; if ($verb != $is_a_relation_name_ar) { continue; } //echoN("==".$subject); $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $subject); //$conceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$subject]; $conceptLabelAR = $conceptArr['label_ar']; $conceptLabelEN = $conceptArr['label_en']; $conceptFrequency = $conceptArr['frequency']; $conceptWeight = $conceptArr['weight']; $type = $conceptArr['type']; if ($conceptFrequency < $minFreq) { continue; } if ($lang == "EN") { $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelEN); } else { $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelAR); } $children = array(); if (!isset($alreadyInLevel1[$subject])) { $children = getTreeNodeChildren($MODEL_QA_ONTOLOGY, $subject, $minFreq, $lang, $level, $alreadyInLevel1); } //else //{ // preprint_r($conceptArr); // exit; //} $childrenArr[] = array("name" => $conceptNameClean, "size" => $conceptFrequency, "children" => $children); } return $childrenArr; }
function loadModels($modelsToBeLoaded, $lang) { global $modelSources, $serializedModelFile, $quranMetaDataFile, $META_DATA, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, $MODEL_QURANA; global $UTHMANI_TO_SIMPLE_WORD_MAP, $numberOfSuras, $pauseMarksFile; global $TRANSLATION_MAP_EN_TO_AR, $TRANSLATION_MAP_AR_TO_EN, $TRANSLITERATION_WORDS_MAP, $TRANSLITERATION_VERSES_MAP; global $wordByWordTranslationFile, $transliterationFile; global $MODEL_WORDNET, $qaOntologyNamespace, $qaOntologyFile, $is_a_relation_name_ar, $is_a_relation_name_en; global $thing_class_name_ar, $thing_class_name_en; global $MODEL_QA_ONTOLOGY, $arabicStopWordsFileL2; global $TRANSLITERATION_WORDS_LOCATION_MAP, $TRANSLITERATION_WORDS_INDEX; //not working gc_enable(); if (!function_exists("apc_exists")) { throw new Exception("APC not found!"); } //echoN("MODEL EXISTS IN CACHE?:".apc_exists("EN/MODEL_CORE/TOTALS/")); ##### CHECK MODEL IN CACHE ##### ##### if (TRUE && apc_exists("EN/MODEL_CORE/TOTALS/") !== false) { // split list by comma $modelListArr = preg_split("/,/", trim($modelsToBeLoaded)); /** * TODO: CHANGE THE CODE TO REFERENCE APC MEMORY DIRECTLY INSTEAD OF LOADING DATA IN EACH SCRIPT */ foreach ($modelListArr as $modelName) { //echoN("$modelName $lang ".time()); // echoN(memory_get_peak_usage()); //echoN($modelName); if ($modelName == "ontology") { /*$MODEL_QA_ONTOLOGY = apc_fetch("MODEL_QA_ONTOLOGY"); if ($MODEL_QA_ONTOLOGY===false ) { echo "$MODEL_QA_ONTOLOGY NOT CACHED";exit; } */ } if ($modelName == "wordnet") { } if ($modelName == "core") { //$MODEL_CORE = json_decode((file_get_contents("$serializedModelFile.core")),TRUE); /*$MODEL_CORE = apc_fetch("MODEL_CORE[$lang]"); if ($MODEL_CORE===false ) { echo "CORE MODEL [$lang] NOT CACHED";exit; }*/ } else { if ($modelName == "search") { //$MODEL_SEARCH = json_decode((file_get_contents("$serializedModelFile.search")),TRUE); //$MODEL_SEARCH = apc_fetch("MODEL_SEARCH[$lang]"); /*if ($MODEL_SEARCH===false ) { echo "SEARCH MODEL [$lang] NOT CACHED";exit; }*/ } else { if ($modelName == "qac") { //$MODEL_QAC = json_decode((file_get_contents("$serializedModelFile.qac")),TRUE); /*$MODEL_QAC = apc_fetch("MODEL_QAC"); if ($MODEL_QAC===false ) { echo "QAC MODEL NOT CACHED";exit; } */ } } } } $MODEL_WORDNET['INDEX'] = apc_fetch("WORDNET_INDEX"); if ($MODEL_WORDNET['INDEX'] === false) { echo "MODEL_WORDNET['INDEX'] NOT CACHED"; exit; } $MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] = apc_fetch("WORDNET_LEXICO_SEMANTIC_CATEGORIES"); if ($MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] === false) { echo " MODEL MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] NOT CACHED"; exit; } $MODEL_WORDNET['DATA'] = apc_fetch("WORDNET_DATA"); if ($MODEL_WORDNET['DATA'] === false) { echo "MODEL MODEL_WORDNET['DATA'] NOT CACHED"; exit; } //else if ( ($modelName=="qurana")) //{ //$MODEL_QURANA = json_decode((file_get_contents("$serializedModelFile.qurana")),TRUE); $MODEL_QURANA = apc_fetch("MODEL_QURANA"); if ($MODEL_QURANA === false) { echo "QURANA MODEL NOT CACHED"; exit; } //} return; } ########## ##### ##### ##### ##### ##### //$quran = file($quranMetaDataFile); $quranMetaDataXMLObj = simplexml_load_file($quranMetaDataFile); ###### CONVERT META XML STRUCUTURE TO OUR STRUCTURE foreach ($quranMetaDataXMLObj->suras as $index => $surasArr) { foreach ($surasArr->sura as $suraMetaArr) { $tempArr = array(); $tempArr['index'] = (string) $suraMetaArr['index']; $tempArr['ayas'] = (string) $suraMetaArr['ayas']; $tempArr['name_ar'] = (string) $suraMetaArr['name']; $tempArr['name_trans'] = (string) $suraMetaArr['tname']; $tempArr['name_en'] = (string) $suraMetaArr['ename']; $tempArr['type'] = (string) $suraMetaArr['type']; $tempArr['order'] = (string) $suraMetaArr['order']; $META_DATA['SURAS'][] = $tempArr; } } ############################################## /////////// LOAD ONTOLOGY $reader = new OWLReader(); $ontology = new OWLMemoryOntology(); $thingClassName = "{$thing_class_name_ar}"; $ontology->setNamespace($qaOntologyNamespace); $reader->readFromFile($qaOntologyFile, $ontology); //preprint_r($ontology->{'owl_data'}['classes']); //preprint_r($ontology->{'owl_data'}['properties']); //preprint_r($ontology->{'owl_data'}['labels']); //preprint_r($ontology->{'owl_data'}['annotations']); //preprint_r($ontology->{'owl_data'}['instances']); $classes = $ontology->{'owl_data'}['classes']; $instances = $ontology->{'owl_data'}['instances']; $qaOntologyConceptsArr = array(); $qaOntologyRelationsArr = array(); $relationsCount = 0; foreach ($classes as $className => $infoArr) { $className = stripOntologyNamespace($className); $qaOntologyConceptsArr[$className] = array("type" => "class"); //echoN($className); //preprint_r($infoArr); foreach ($infoArr[0]['properties'] as $index => $propertiesArr) { /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/ foreach ($propertiesArr as $index2 => $onePropertyArr) { if (empty($onePropertyArr)) { continue; } $verb = key($onePropertyArr); $objectClassArr = current($onePropertyArr); $objectConceptName = stripOntologyNamespace($objectClassArr[0]); //echoN("CLASS:***** $className => $verb -> $objectConceptName"); $attributedArr = next($onePropertyArr); $freq = $attributedArr['frequency']; $engTranslation = $attributedArr['verb_translation_en']; $verbUthmani = $attributedArr['verb_uthmani']; $relHashID = buildRelationHashID($className, $verb, $objectConceptName); $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $className, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani); //preprint_r($qaOntologyRelationsArr[$relHashID]); $relationsCount++; } } } foreach ($instances as $instanceName => $intancesArr) { foreach ($intancesArr as $index => $infoArr) { $subjectConceptName = stripOntologyNamespace($instanceName); $parent = stripOntologyNamespace($infoArr['class']); //echoN("$subjectConceptName $parent"); $relHashID = buildRelationHashID($subjectConceptName, $is_a_relation_name_ar, $parent); $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => "{$is_a_relation_name_ar}", "OBJECT" => $parent, "VERB_TRANSLATION_EN" => "{$is_a_relation_name_en}"); if ($parent != $thing_class_name_ar) { $relationsCount++; } $propertiesArr = $infoArr['properties']; //echoN($instanceName); //echoN("$instanceName:@@@"); //preprint_r($propertiesArr); /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/ foreach ($propertiesArr as $index2 => $onePropertyArr) { if (empty($onePropertyArr)) { continue; } $verb = key($onePropertyArr); $objectClassArr = current($onePropertyArr); $objectConceptName = stripOntologyNamespace($objectClassArr[0]); //echoN("***** $verb -> $objectConceptName"); $attributedArr = next($onePropertyArr); $freq = $attributedArr['frequency']; $engTranslation = $attributedArr['verb_translation_en']; $verbUthmani = $attributedArr['verb_uthmani']; $relHashID = buildRelationHashID($subjectConceptName, $verb, $objectConceptName); $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani); $relationsCount++; } // if it is class dont make it instance even if it is a subject (subclass of another class // BUG: animal was not apearing on ontology graph page since it was instance if (empty($qaOntologyConceptsArr[$subjectConceptName]) || $qaOntologyConceptsArr[$subjectConceptName][type] != 'class') { $qaOntologyConceptsArr[$subjectConceptName] = array("type" => "instance"); } } } foreach ($qaOntologyConceptsArr as $conceptName => $infoArr) { $fullConceptName = $qaOntologyNamespace . $conceptName; $labelsArr = $ontology->{'owl_data'}['labels'][$fullConceptName]; foreach ($labelsArr as $labelLang => $label) { /*if ( mb_strlen($label)==1) { echon($fullConceptName); preprint_r($ontology->{'owl_data'}['labels'][$fullConceptName]); }*/ $qaOntologyConceptsArr[$conceptName]['label_' . strtolower($labelLang)] = $label; } // "Thing" does not have annotations if (isset($ontology->{'owl_data'}['annotations'][$fullConceptName])) { $annotationsArr = $ontology->{'owl_data'}['annotations'][$fullConceptName]; foreach ($annotationsArr as $index => $annotArr) { $key = $annotArr['KEY']; $val = $annotArr['VAL']; $qaOntologyConceptsArr[$conceptName][$key] = $val; //echoN("[$conceptName][$key] = $val"); } } } ////////// OUTPUT STATS /*echoN("INSTANCES COUNT:".count($ontology->{'owl_data'}['instances'])); echoN("CLASSES COUNT:".count($ontology->{'owl_data'}['classes'])); echoN("PROPERTIES COUNT - DECLERATIONS ONLY:".count($ontology->{'owl_data'}['properties']));; echoN("CONCEPTS COUNT:".count($qaOntologyConceptsArr)); echoN("RELATIONS COUNT:".$relationsCount); preprint_r($qaOntologyRelationsArr);*/ ////////////////// ///////////// QUALITY CHECK CONCEPTS $qaOntologyConceptsArr2 = array(); foreach ($qaOntologyConceptsArr as $key => $val) { $newKey = strtr($key, "_", " "); $qaOntologyConceptsArr2[$newKey] = $value; } $ONTOLOGY_EXTRACTION_FOLDER = "../data/ontology/extraction/"; $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.final")); $diffArr = array_diff(array_keys($qaOntologyConceptsArr2), array_keys($finalConcepts)); $conceptsDiffCount = count($matchingTable); if ($relationsDiffCount > 0) { echoN("<b>### OWL-PROPRIETARY-CONCEPTS-DIFF-COUNT:</b>" . $conceptsDiffCount); } //preprint_r($diffArr); ////////////////////////////////////////////////////////////// //////// quality check relations $relationsArr = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations")); $matchingTable = array(); foreach ($qaOntologyRelationsArr as $index => $relArr) { $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT']; //since Thing relations are not in the list we are comparing with if ($relArr['OBJECT'] == $thing_class_name_ar) { continue; } //echoN($trippleStr); $trippleStr = trim($trippleStr); $matchingTable[$trippleStr]++; } foreach ($relationsArr as $index => $relArr) { $relArr['SUBJECT'] = strtr($relArr['SUBJECT'], " ", "_"); $relArr['VERB'] = strtr($relArr['VERB'], " ", "_"); $relArr['OBJECT'] = strtr($relArr['OBJECT'], " ", "_"); $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT']; $trippleStr = trim($trippleStr); $matchingTable[$trippleStr]++; } function filterFunc($v) { return $v <= 1; } $matchingTable = array_filter($matchingTable, 'filterFunc'); $relationsDiffCount = count($matchingTable); if ($relationsDiffCount > 0) { echoN("<b>### OWL-PROPRIETARY-RELATIONS-DIFF-COUNT:</b>" . $relationsDiffCount); preprint_r($matchingTable); } ////////////////////////////////////////////// //echoN( join("<br>",array_keys($qaOntologyConceptsArr))); $qaOntologyVerbIndex = array(); $qaOntologyGraphSourcesIndex = array(); $qaOntologyGraphTargetsIndex = array(); //preprint_r($qaOntologyRelationsArr); //exit; foreach ($qaOntologyRelationsArr as $index => $relArr) { $subject = $relArr['SUBJECT']; $verb = $relArr['VERB']; $verb_translation_en = $relArr['VERB_TRANSLATION_EN']; $object = $relArr['OBJECT']; //$qaOntologyVerbIndex[$verb][]=array("SUBJECT"=>$subject,"OBJECT"=>$object); //$qaOntologyVerbIndex[$verb_translation_en][]=array("SUBJECT"=>$subject,"OBJECT"=>$object); addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb, array("SUBJECT" => $subject, "OBJECT" => $object)); addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb_translation_en, array("SUBJECT" => $subject, "OBJECT" => $object)); //$qaOntologyGraphSourcesIndex[$subject][]=array("link_verb"=>$verb,"target"=>$object,"relation_index"=>$index); //$qaOntologyGraphTargetsIndex[$object][]=array("source"=>$subject,"link_verb"=>$verb,"relation_index"=>$index); addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $subject, array("link_verb" => $verb, "target" => $object, "relation_index" => $index)); addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $object, array("source" => $subject, "link_verb" => $verb, "relation_index" => $index)); } $qaOntologyConceptsENtoARMapArr = array(); foreach ($qaOntologyConceptsArr as $arName => $conceptArr) { $enLabel = trim(strtolower($conceptArr['label_en'])); //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName; //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName; addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $enLabel, $arName); } $qaSynonymsIndex = array(); foreach ($qaOntologyConceptsArr as $arName => $conceptArr) { addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $arName, $conceptArr); $i = 1; while (isset($conceptArr['synonym_' . $i])) { if (empty($conceptArr['synonym_' . $i])) { $i++; continue; } $synonymLabel = trim(strtolower($conceptArr['synonym_' . $i])); $qaSynonymsIndex[$synonymLabel] = $arName; addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $synonymLabel, $arName); $i++; } } //preprint_r($qaOntologyConceptsArr);exit; //$MODEL_QA_ONTOLOGY['CONCEPTS'] = $qaOntologyConceptsArr; //$MODEL_QA_ONTOLOGY['RELATIONS'] = $qaOntologyRelationsArr; addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", "", $qaOntologyRelationsArr); //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'] = $qaOntologyGraphSourcesIndex; //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'] = $qaOntologyGraphTargetsIndex; //$MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'] = $qaOntologyConceptsENtoARMapArr; //$MODEL_QA_ONTOLOGY['VERB_INDEX'] = $qaOntologyVerbIndex; //$MODEL_QA_ONTOLOGY['SYNONYMS_INDEX'] = $qaSynonymsIndex; //$res = apc_store("MODEL_QA_ONTOLOGY",$MODEL_QA_ONTOLOGY); //if ( $res===false){ throw new Exception("Can't cache MODEL_QA_ONTOLOGY"); } //preprint_r($MODEL_QA_ONTOLOGY);exit; //////// END ONTOLOGY LOADING //////////////////////////// /// WORDNET loadWordnet($MODEL_WORDNET); ///////////// //free resources $quranMetaDataXMLObj = null; unset($quranMetaDataXMLObj); foreach ($modelSources as $supportedLang => $modelSourceArr) { $type = $modelSourceArr['type']; $file = $modelSourceArr['file']; //echoN("$lang $type $file"); loadModel($supportedLang, $type, $file); //not working $gced = gc_collect_cycles(); //echoN($gced); } //echoN(json_encode($MODEL)); ############ Uthmani/Simple mapping table ################# ############ AND WORD-WORD TRANSLATION AND TRANSLITERATION ################# $pauseMarksArr = getPauseMarksArrByFile($pauseMarksFile); $wordByWordFileArr = file($wordByWordTranslationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES); $translitertationArr = file($transliterationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES); $WORD_SENSES_EN = array(); $WORD_SENSES_AR = array(); $quranTextEntryFromAPC_AR = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", ""); $quranTextEntryFromAPC_UTH = getModelEntryFromMemory("AR_UTH", "MODEL_CORE", "QURAN_TEXT", ""); /* SURA'S LOOP **/ for ($s = 0; $s < $numberOfSuras; $s++) { $suraSize = count($quranTextEntryFromAPC_AR[$s]); /* VERSES LOOP **/ for ($a = 0; $a < $suraSize; $a++) { $i++; $verseTextSimple = $quranTextEntryFromAPC_AR[$s][$a]; $simpleWordsArr = preg_split("/ /", $verseTextSimple); $verseTextUthmani = $quranTextEntryFromAPC_UTH[$s][$a]; $uthmaniWordsArr = preg_split("/ /", $verseTextUthmani); $simpleWordsArr = removePauseMarksFromArr($pauseMarksArr, $simpleWordsArr); $uthmaniWordsArr = removePauseMarksFromArr($pauseMarksArr, $uthmaniWordsArr); $verseLocation = $s + 1 . ":" . ($a + 1); $UTHMANI_TO_SIMPLE_LOCATION_MAP[$verseLocation] = array(); ///////// Transliteration ///////////// $transliterationLine = current($translitertationArr); next($translitertationArr); $lineParts = preg_split("/\\|/", $transliterationLine); $verseTransliteration = $lineParts[2]; //echoN($transliterationLine); $TRANSLITERATION_VERSES_MAP[$verseLocation] = $verseTransliteration; $wordsTransliterationArr = preg_split("/ /", $verseTransliteration); // preprint_r($wordsTransliterationArr);exit; ///////////////////////////////////////////////// $wtwIndex = 0; foreach ($uthmaniWordsArr as $index => $wordUthmani) { $qacMasterID = $s + 1 . ":" . ($a + 1) . ":" . ($index + 1); $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $qacMasterID); $lemma = $qacMasterTableEntry[0]['FEATURES']['LEM']; // to handle multi segment words such as الدنيا if (empty($lemma)) { $lemma = $qacMasterTableEntry[1]['FEATURES']['LEM']; } //echoN("|$lemma|$wordUthmani"); //$wtwIndex (INDEX_IN_AYA_EMLA2Y) needs to be 1 based ( UTHMANI=IMLA2Y ) $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1; $wordSimple = $simpleWordsArr[$wtwIndex++]; //$UTHMANI_TO_SIMPLE_LOCATION_MAP[($s+1).":".($a+1)][($index+1)."-".$wordUthmani]=($wtwIndex)."-".$wordSimple; /* for ayas which are different in size, do the following * if the current word is ويا or ها or يا * then join it with the next word and make them one word */ if (count($uthmaniWordsArr) != count($simpleWordsArr) && ($wordSimple == "يا" || $wordSimple == "ها" || $wordSimple == "ويا" || $wordUthmani == "وَأَلَّوِ")) { if ($wordUthmani == "يَبْنَؤُمَّ") { // example 0 => 1 $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1; //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple; $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++] . " " . $simpleWordsArr[$wtwIndex++]; } else { // example 0 => 1 $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1; //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple; $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++]; } //echoN("$wordUthmani:$wordSimple"); } // printHTMLPageHeader(); // echoN("$wordSimple|$wordUthmani"); ///////// english translation //////// $wordByWordTranslationLine = current($wordByWordFileArr); next($wordByWordFileArr); $linePartsArr = preg_split("/\\|/", $wordByWordTranslationLine); $englishTranslationForCurrentWord = $linePartsArr[5]; ///////////////////////////////////////////////// $WORD_SENSES_EN[$englishTranslationForCurrentWord][$wordUthmani]++; $WORD_SENSES_AR[$wordUthmani][$englishTranslationForCurrentWord]++; $TRANSLATION_MAP_EN_TO_AR[$englishTranslationForCurrentWord] = $wordUthmani; $TRANSLATION_MAP_AR_TO_EN[$wordUthmani] = $englishTranslationForCurrentWord; $TRANSLITERATION_WORDS_MAP[$wordUthmani] = $wordsTransliterationArr[$index]; $clenaedTranliteration = cleanTransliteratedText($wordsTransliterationArr[$index]); $TRANSLITERATION_WORDS_INDEX[$clenaedTranliteration] = 1; $TRANSLITERATION_WORDS_LOCATION_MAP["{$s}:{$a}:{$index}"] = $wordsTransliterationArr[$index]; //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP); // preprint_r($TRANSLATION_MAP_AR_TO_EN); // preprint_r($TRANSLITERATION_WORDS_MAP); $UTHMANI_TO_SIMPLE_WORD_MAP[$wordUthmani] = $wordSimple; addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordUthmani, $wordSimple); $UTHMANI_TO_SIMPLE_WORD_MAP[$wordSimple] = $wordUthmani; addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordSimple, $wordUthmani); if (!empty($lemma)) { if (!isset($LEMMA_TO_SIMPLE_WORD_MAP[$lemma])) { $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple; } else { $oldSimple = $LEMMA_TO_SIMPLE_WORD_MAP[$lemma]; if (myLevensteinEditDistance($oldSimple, $lemma) > myLevensteinEditDistance($wordSimple, $lemma)) { $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple; } } } } } } /////// ADD UTHMANI TO SIMPLE LOCATION MAP TO MEMORY foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP as $verseLocation => $verseMappingArr) { /*foreach($mappingArr as $uhtmaniIndex=>$imal2yIndex) { }*/ addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_LOCATION_MAP", $verseLocation, $verseMappingArr); } /////////////////////////////////////////////////////// //preprint_r($TRANSLATION_MAP_EN_TO_AR);exit; //preprint_r($WORD_SENSES_AR);exit; // CAN'T BE ADDED IN THE CORE_MODEL since the mapping happens after loadModel //$res = apc_store("UTHMANI_TO_SIMPLE_WORD_MAP",$UTHMANI_TO_SIMPLE_WORD_MAP); //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_WORD_MAP"); } //$res = apc_store("UTHMANI_TO_SIMPLE_LOCATION_MAP",$UTHMANI_TO_SIMPLE_LOCATION_MAP); //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_LOCATION_MAP"); } $res = apc_store("LEMMA_TO_SIMPLE_WORD_MAP", $LEMMA_TO_SIMPLE_WORD_MAP); if ($res === false) { throw new Exception("Can't cache LEMMA_TO_SIMPLE_WORD_MAP"); } $res = apc_store("WORDS_TRANSLATIONS_EN_AR", $TRANSLATION_MAP_EN_TO_AR); if ($res === false) { throw new Exception("Can't cache WORDS_TRANSLATIONS_EN_AR"); } $res = apc_store("WORDS_TRANSLATIONS_AR_EN", $TRANSLATION_MAP_AR_TO_EN); if ($res === false) { throw new Exception("Can't cache WORDS_TRANSLATIONS_AR_EN"); } $res = apc_store("WORDS_TRANSLITERATION", $TRANSLITERATION_WORDS_MAP); if ($res === false) { throw new Exception("Can't cache WORDS_TRANSLITERATION"); } $res = apc_store("TRANSLITERATION_WORDS_LOCATION_MAP", $TRANSLITERATION_WORDS_LOCATION_MAP); if ($res === false) { throw new Exception("Can't cache TRANSLITERATION_WORDS_LOCATION_MAP"); } $res = apc_store("TRANSLITERATION_VERSES_MAP", $TRANSLITERATION_VERSES_MAP); if ($res === false) { throw new Exception("Can't cache TRANSLITERATION_VERSES_MAP"); } $res = apc_store("TRANSLITERATION_WORDS_INDEX", $TRANSLITERATION_WORDS_INDEX); if ($res === false) { throw new Exception("Can't cache TRANSLITERATION_WORDS_INDEX"); } $res = apc_store("WORD_SENSES_EN", $WORD_SENSES_EN); if ($res === false) { throw new Exception("Can't cache WORD_SENSES_EN"); } $res = apc_store("WORD_SENSES_AR", $WORD_SENSES_AR); if ($res === false) { throw new Exception("Can't cache {$WORD_SENSES_AR}"); } //// ENRICH INVERTED INDEX BY UTHMANI-EMLA2Y INDEXES //echoN(count($MODEL_SEARCH['AR']['INVERTED_INDEX'])); foreach (getAPCIterator("AR\\/MODEL_SEARCH\\/INVERTED_INDEX\\/.*") as $invertedIndexCursor) { $wordDataArr = $invertedIndexCursor['value']; $key = $invertedIndexCursor['key']; $word = getEntryKeyFromAPCKey($key); foreach ($wordDataArr as $index => $documentArrInIndex) { $WORD_TYPE = $documentArrInIndex['WORD_TYPE']; $SURA = $documentArrInIndex['SURA']; $AYA = $documentArrInIndex['AYA']; //echoN($word." ".$WORD_TYPE); if ($WORD_TYPE == "NORMAL_WORD") { $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y']; foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP[$SURA + 1 . ":" . ($AYA + 1)] as $uhtmaniIndex => $imal2yIndex) { if ($imal2yIndex == $INDEX_IN_AYA_EMLA2Y) { $INDEX_IN_AYA_UTHMANI = $uhtmaniIndex; break; } } //echoN($INDEX_IN_AYA_UTHMANI); $wordDataArr[$index]['INDEX_IN_AYA_UTHMANI'] = $INDEX_IN_AYA_UTHMANI; } else { // needed for highlighting pronoun charcters in search $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI']; $INDEX_IN_AYA_EMLA2Y = getSimpleWordIndexByUthmaniWordIndex($SURA + 1 . ":" . ($AYA + 1), $INDEX_IN_AYA_UTHMANI); $wordDataArr[$index]['INDEX_IN_AYA_EMLA2Y'] = $INDEX_IN_AYA_EMLA2Y; } } //UPDATE updateModelData($key, $wordDataArr); } //$res = apc_store("MODEL_SEARCH[AR]",$MODEL_SEARCH['AR']); //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[AR]"); } //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP); /// ADD TRANSLITERATION TO INVERETD INDEX WWITH ENGLISH WORDS if ($lang == "EN") { $invertedIndexBatchApcArr = array(); foreach ($TRANSLITERATION_WORDS_LOCATION_MAP as $location => $transliteratedWord) { $locationArr = explode(":", $location); $s = $locationArr[0]; $a = $locationArr[1]; $wordIndex = $locationArr[2]; //echoN("$transliteratedWord,$s,$a,$wordIndex"); $transliteratedWord = strtolower(strip_tags($transliteratedWord)); //$MODEL_SEARCH['EN']['INVERTED_INDEX'][$word] addToInvertedIndex($invertedIndexBatchApcArr, $lang, $transliteratedWord, $s, $a, $wordIndex, "NORMAL_WORD"); } addToMemoryModelBatch($invertedIndexBatchApcArr); //$res = apc_store("MODEL_SEARCH[EN]",$MODEL_SEARCH['EN']); } //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[EN]"); } ///////////////////////////////////////////////////////// //preprint_r($UTHMANI_TO_SIMPLE_WORD_MAP); //preprint_r($MODEL_CORE["AR_UTH"]['QURAN_TEXT']);exit; ############################################################## // get memory usage $debug = memory_get_usage(true) / 1024 / 1024 . "/" . memory_get_peak_usage(true) / 1024 / 1024 . "Memory <br>"; //echoN($debug); //needed to be set here after both languages has been loaded // reload all models from memory to set all variables (WORDNET) - after model generation /* needed to reload all generated models from memory specialy model_core since * it has 3 languages, if this line is removed: all 3 langauges are loaded although only one language * is requested, also it caused a bug in getPoSTaggedSubsentences */ //loadModels($modelsToBeLoaded,$lang); }
function printResultVerses($scoringTable, $lang, $direction, $query, $isPhraseSearch, $isQuestion, $script, $significantCollocationWords = null, $isTransliterationSearch = false) { global $script, $TRANSLITERATION_VERSES_MAP; $QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", ""); $QURAN_TEXT_UTH = getModelEntryFromMemory("AR_UTH", "MODEL_CORE", "QURAN_TEXT", ""); $META_DATA = getModelEntryFromMemory($lang, "MODEL_CORE", "META_DATA", ""); $TOTALS = getModelEntryFromMemory($lang, "MODEL_CORE", "TOTALS", ""); if ($lang == "EN") { if ($script == "simple") { $QURAN_TEXT_OTHER_LANG = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", ""); } else { $QURAN_TEXT_OTHER_LANG = $QURAN_TEXT_UTH; } } else { $QURAN_TEXT_OTHER_LANG = getModelEntryFromMemory("EN", "MODEL_CORE", "QURAN_TEXT", ""); } //preprint_r($scoringTable);exit; $searchResultsTextArr = array(); //preprint_r($scoringTable);exit; $relevanceReverseOrderIndex = count($documentScoreArr); foreach ($scoringTable as $documentID => $documentScoreArr) { //preprint_r($documentScoreArr); $relevanceReverseOrderIndex--; $SURA = $documentScoreArr['SURA']; $AYA = $documentScoreArr['AYA']; $TEXT = $QURAN_TEXT[$SURA][$AYA]; $TEXT_UTH = $QURAN_TEXT_UTH[$SURA][$AYA]; $TEXT_TRANSLITERATED = cleanTransliteratedText($TRANSLITERATION_VERSES_MAP[$SURA + 1 . ":" . ($AYA + 1)]); $WORD_TYPE = $documentScoreArr['WORD_TYPE']; $EXTRA_INFO = $documentScoreArr['EXTRA_INFO']; $INDEX_IN_AYA_EMLA2Y = $documentScoreArr['INDEX_IN_AYA_EMLA2Y']; $WORDS_IN_AYA = $documentScoreArr['POSSIBLE_HIGHLIGHTABLE_WORDS']; $PRONOUNS = $documentScoreArr['PRONOUNS']; $score = $documentScoreArr['SCORE']; $searchResultsTextArr[] = $TEXT; $TEXT_TRANSLATED = $QURAN_TEXT_OTHER_LANG[$SURA][$AYA]; $SURA_NAME = $META_DATA['SURAS'][$SURA]['name_' . strtolower($lang)]; $SURA_NAME_LATIN = $META_DATA['SURAS'][$SURA]['name_trans']; // وكذلك جلناكم امة وسطا 143/256 $TOTAL_VERSES_OF_SURA = $TOTALS['TOTAL_PER_SURA'][$SURA]['VERSES']; //preprint_r($MODEL['QURAN_TEXT']); $MATCH_TYPE = ""; if ($WORD_TYPE == "PRONOUN_ANTECEDENT") { $MATCH_TYPE = "ضمير"; if ($lang == "EN") { $MATCH_TYPE = "pronoun"; } } else { if ($WORD_TYPE == "ROOT" || $WORD_TYPE == "LEM") { $MATCH_TYPE = "تصريف / إشتقاق"; } } // empty in case of only pronouns if (!empty($WORDS_IN_AYA)) { if ($isPhraseSearch) { // mark all POSSIBLE_HIGHLIGHTABLE_WORDS $TEXT = preg_replace("/(" . $query . ")/mui", "<marked>\\1</marked>", $TEXT); } else { // mark all POSSIBLE_HIGHLIGHTABLE_WORDS $TEXT = preg_replace("/(" . join("|", array_keys($WORDS_IN_AYA)) . ")/mui", "<marked>\\1</marked>", $TEXT); if ($isTransliterationSearch) { $TEXT_TRANSLITERATED = preg_replace("/(" . join("|", array_keys($WORDS_IN_AYA)) . ")/mui", "<marked>\\1</marked>", $TEXT_TRANSLITERATED); } } } //preprint_r($PRONOUNS); // mark PRONOUNS //if ( $WORD_TYPE=="PRONOUN_ANTECEDENT") {} // COMMENTED SINCE WORD MAY HAVE BOTH PRON AND NORMAKL WORDS foreach ($PRONOUNS as $pronounText => $PRONOUN_INDEX_IN_AYA_EMLA2Y) { $pronounText = removeTashkeel($pronounText); $TEXT = markSpecificWordInText($TEXT, $PRONOUN_INDEX_IN_AYA_EMLA2Y - 1, $pronounText, "marked"); //$TEXT = preg_replace("/(".$EXTRA_INFO.")/mui", "<marked>\\1</marked>", $TEXT); //echoN("|".$TEXT); } if ($isQuestion) { //preprint_r($significantCollocationWords); foreach ($significantCollocationWords as $word => $freq) { $TEXT = markWordWithoutWordIndex($TEXT, $word, "marked_prospect_answer"); //$TEXT = preg_replace("/(".$EXTRA_INFO.")/mui", "<marked>\\1</marked>", $TEXT); //echoN("|".$TEXT); } } $documentID = preg_replace("/\\:/", "-", $documentID); //preprint_r($documentScoreArr); ?> <div class='result-aya-container' order='<?php echo $SURA + 1; ?> ' relevance='<?php echo $relevanceReverseOrderIndex; ?> ' > <div class='result-aya' style="direction:<?php echo $direction; ?> " id="<?php echo $documentID; ?> " > <?php if ($script == "uthmani" && $lang == "AR") { echo $TEXT_UTH; } else { echo $TEXT; if ($isTransliterationSearch) { echo "<hr class='transliteration-separator'/>"; echo "<div class='transliteration-verse-text-area'>{$TEXT_TRANSLITERATED}</div>"; } } ?> <div id="<?php echo $documentID; ?> -translation" class='result-translated-text' style="direction:<?php echo $lang == "AR" ? "ltr" : "rtl"; ?> " > <?php echo $TEXT_TRANSLATED; ?> </div> </div> <div class='result-aya-info' > <span class='result-sura-info' style="direction:<?php echo $direction; ?> "> <?php echo $SURA_NAME; ?> <?php if ($lang == "EN") { echo " ({$SURA_NAME_LATIN})"; } ?> [<?php echo $SURA + 1 . ":" . ($AYA + 1); ?> ] <?php php; ?> </span> <span class='result-aya-showtranslation' > <?php $showTransText = "Show Translation"; if ($lang == "EN") { $showTransText = "Show Arabic"; } ?> <a href="javascript:showTranslationFor('<?php echo $documentID; ?> ')"><?php echo $showTransText; ?> </a> </span> <span class='result-more-about-aya'> <a target='_new' href='http://quran.com/<?php echo $SURA + 1 . "/" . ($AYA + 1); ?> '> More </a> </span> <span class='result-match-type'> <?php echo $MATCH_TYPE; ?> </span> </div> </div> <?php } return $searchResultsTextArr; }
//$qacMasterTableEntryArr2 = getModelEntryFromMemory("AR","MODEL_QAC","QAC_POS",$qacLocation); $qacPoSTagsIterator = getAPCIterator("AR\\/MODEL_QAC\\/QAC_POS\\/.*"); $QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", ""); $TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", ""); $PAUSEMARKS = $TOTALS['PAUSEMARKS']; preprint_r($PAUSEMARKS); foreach ($qacPoSTagsIterator as $qacPoSTagsIteratorCursor) { $POS_ARR = $qacPoSTagsIteratorCursor['value']; $key = $qacPoSTagsIteratorCursor['key']; $POS = getEntryKeyFromAPCKey($key); if ($POS == "N" || $POS == "PN" || $POS == "ADJ") { continue; } //echoN("|$POS|"); foreach ($POS_ARR as $location => $segmentId) { $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location); // get Word, Lema and root $segmentWord = $qacMasterTableEntry[$segmentId - 1]['FORM_AR']; $segmentWordLema = $qacMasterTableEntry[$segmentId - 1]['FEATURES']['LEM']; $segmentWordRoot = $qacMasterTableEntry[$segmentId - 1]['FEATURES']['ROOT']; $verseLocation = substr($location, 0, strlen($location) - 2); //$segmentWord = removeTashkeel($segmentWord); if ($POS == "DET") { // second segment PoS $segmentPoStag = $qacMasterTableEntry[$segmentId]['TAG']; //number of segments $numberOfSegmentsInWord = count($qacMasterTableEntry); if (($segmentPoStag == "N" || $segmentPoStag == "ADJ") && $numberOfSegmentsInWord == 2) { continue; } }
function getConceptsFoundInText($text, $lang) { global $thing_class_name_ar, $is_a_relation_name_ar; $conceptsInTextArr = array(); $textWordsArr = preg_split("/ /", $text); foreach ($textWordsArr as $index => $word) { if ($lang == "EN") { $word = cleanAndTrim($word); $word = strtolower($word); // translate English name to arabic concept name/id //$wordConveretedToConceptID = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word]; $wordConveretedToConceptID = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word); } else { $wordConveretedToConceptID = convertWordToConceptID($word); } //echoN($wordConveretedToConceptID); if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID)) { //preprint_r($MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID]);exit; //echoN($wordConveretedToConceptID); //$mainConceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID]; $mainConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID); $conceptLabelAR = $mainConceptArr['label_ar']; $conceptLabelEN = $mainConceptArr['label_en']; $conceptFrequency = $mainConceptArr['frequency']; $conceptWeight = $mainConceptArr['weight']; $finalNodeLabel = $conceptLabelAR; if ($lang == "EN") { $finalNodeLabel = $conceptLabelEN; } if ($wordConveretedToConceptID == $thing_class_name_ar) { continue; } $conceptsInTextArr[$wordConveretedToConceptID] = createNewConceptObj($nodeSerialNumber, $lang, $finalNodeLabel, $mainConceptArr, $randomXLocation, $randomYLocation, 1); } } return $conceptsInTextArr; }
function getLexiconItemFromMemory($token) { $token = strtolower($token); $entry = getModelEntryFromMemory("EN", "PHPIR_LEXICON", "POS_ENTRY", $token); return $entry[0]; }
addChildrenToCluster($clusteredArr, $treeRootNodeObj, $clusterSerialNumber, $nodeSerialNumber, 1, $handledBefore); //preprint_r($handledBefore); //preprint_r($clusteredArr);exit; //$graphNodesJSON = json_encode($graphObj['nodes']); //$graphLinksJSON = json_encode($graphObj['links']); //echoN($treeRootNodeJSON); //echoN($graphNodesJSON); //echoN($graphLinksJSON); //exit; $qaRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", ""); $filteredClusteredArr = array(); $index = 0; foreach ($clusteredArr as $index => $clusterArrItem) { $conceptName = strtolower(convertConceptIDtoGraphLabel($clusterArrItem['word'])); //$conceptNameAR = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$conceptName]; $conceptNameAR = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $conceptName); // if not in index (then not qurana or word in quran) // and does not have subclasses // then ignore if (!wordOrPhraseIsInIndex($lang, $conceptName) && !conceptHasSubclasses($qaRelationsArr, $conceptNameAR)) { //echoN($conceptName); continue; } $index++; $filteredClusteredArr[] = $clusterArrItem; } //preprint_r($clusteredArr); //echoN(count($clusteredArr)); $clusteredArrJSON = json_encode($filteredClusteredArr); ?>