#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#    You can use Quran Analysis code, framework or corpora in your website
#	 or application (commercial/non-commercial) provided that you link
#    back to www.qurananalysis.com and sufficient credits are given.
#
#  ====================================================================
require_once "../global.settings.php";
require_once "../libs/core.lib.php";
loadModels("core,qac", "AR");
$LEMMA_TO_SIMPLE_WORD_MAP = loadLemmaToSimpleMappingTable();
printHTMLPageHeader();
//$qacMasterTableEntryArr2 = getModelEntryFromMemory("AR","MODEL_QAC","QAC_POS",$qacLocation);
$qacPoSTagsIterator = getAPCIterator("AR\\/MODEL_QAC\\/QAC_POS\\/.*");
$QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
$TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", "");
$PAUSEMARKS = $TOTALS['PAUSEMARKS'];
preprint_r($PAUSEMARKS);
foreach ($qacPoSTagsIterator as $qacPoSTagsIteratorCursor) {
    $POS_ARR = $qacPoSTagsIteratorCursor['value'];
    $key = $qacPoSTagsIteratorCursor['key'];
    $POS = getEntryKeyFromAPCKey($key);
    if ($POS == "N" || $POS == "PN" || $POS == "ADJ") {
        continue;
    }
    //echoN("|$POS|");
    foreach ($POS_ARR as $location => $segmentId) {
        $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location);
        // get Word, Lema and root
			  			
			  		</div>
			  		<table id='uts-mapping-table'>
							<tr>
							<th>
								Uthmani
							</th>
							<th>
								Simple
							</th>
							</tr>
							
							<?php 
$uthmaniCounter = 0;
$simpleCounter = 0;
$qaOntologyConceptsIterator = getAPCIterator("AR\\/OTHERS\\/UTHMANI_TO_SIMPLE_WORD_MAP\\/.*");
foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
    $mapTermKey = getEntryKeyFromAPCKey($conceptsCursor['key']);
    $mapTermVal = $conceptsCursor['value'];
    if (isSimpleQuranWord($mapTermKey)) {
        $simpleCounter++;
        //echoN("##".$mapTermKey);
        continue;
    }
    $uthmaniCounter++;
    ?>
								<tr>
									<td><?php 
    echo $mapTermKey;
    ?>
</td>
Exemplo n.º 3
0
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#    You can use Quran Analysis code, framework or corpora in your website
#	 or application (commercial/non-commercial) provided that you link
#    back to www.qurananalysis.com and sufficient credits are given.
#
#  ====================================================================
require_once "../global.settings.php";
$lang = "AR";
loadModels("", $lang);
$QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", "");
preprint_r($QURAN_TEXT[1][1]);
$location = "1:1:1";
$qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location);
preprint_r($qacMasterTableEntry);
$qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
    $conceptNameID = getEntryKeyFromAPCKey($conceptsCursor['key']);
    $conceptArr = $conceptsCursor['value'];
    $conceptLabelAR = $conceptArr['label_ar'];
    $conceptLabelEN = $conceptArr['label_en'];
    $conceptFrequency = $conceptArr['frequency'];
    $conceptWeight = $conceptArr['weight'];
    preprint_r($conceptArr);
    break;
    //only one concept
}
// print all words in wordnet
preprint_r(array_keys($MODEL_WORDNET['INDEX']));
// get all information about "Egypt" from wordnet
$wordNetEntry = getWordnetEntryByWordString("egypt");
Exemplo n.º 4
0
function isWordPartOfAVerbInVerbIndex($word, $lang)
{
    $verbIndexIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/VERB_INDEX\\/.*");
    foreach ($verbIndexIterator as $verbIndexCursor) {
        $verbWord = getEntryKeyFromAPCKey($verbIndexCursor['key']);
        $verbArr = $verbIndexCursor['value'];
        if ($lang == "EN") {
            $verbWord = strtolower($verbWord);
        }
        if (mb_strpos($verbWord, $word) !== false) {
            //echoN("|$verbWord| |$word|".( mb_strpos($verbWord, $word)!==false));
            return $verbArr;
        }
    }
    return false;
}
Exemplo n.º 5
0
function ontologyToD3TreemapHierarchical($MODEL_QA_ONTOLOGY, $minFreq = 0, $lang)
{
    global $lang;
    $alreadyInLevel1 = array();
    $treeRootObj = array();
    $treeRootObj["name"] = "قرآن";
    if ($lang == "EN") {
        $treeRootObj["name"] = "Quran";
    }
    $treeRootObj["children"] = array();
    /** SHOULD BE ZERO BASED FOR D3 TO WORK - o.target.weight = NULL**/
    $nodeSerialNumber = 0;
    $qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
    foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
        $conceptNameID = getEntryKeyFromAPCKey($conceptsCursor['key']);
        $conceptArr = $conceptsCursor['value'];
        $conceptLabelAR = $conceptArr['label_ar'];
        $conceptLabelEN = $conceptArr['label_en'];
        $conceptFrequency = $conceptArr['frequency'];
        $conceptWeight = $conceptArr['weight'];
        $type = $conceptArr['type'];
        if ($conceptFrequency < $minFreq) {
            continue;
        }
        //ONLY CLASSES (CLUSTERS)
        if ($type == "class") {
            $alreadyInLevel1[$conceptNameID] = 1;
            //echoN($conceptLabelAR);
            //echoN($conceptNameID);
            if ($lang == "EN") {
                $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelEN);
            } else {
                $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelAR);
            }
            /*= array("id"=>$nodeSerialNumber++,"word"=>$conceptLabelAR,
            	 "size"=>$conceptWeight,"x"=>rand($startLocationXMin,$startLocationXMax),
            			"y"=>rand($startLocationYMin,$startLocationYMax));*/
            $treeRootObj["children"][] = array("name" => $conceptNameClean, "size" => $conceptFrequency, "children" => getTreeNodeChildren($MODEL_QA_ONTOLOGY, $conceptNameID, $minFreq, $lang, 1, $alreadyInLevel1));
        }
    }
    /*
    	foreach($MODEL_QA_ONTOLOGY['RELATIONS'] as $index => $relArr)
    	{
    
    		$subject = $relArr['subject'];
    		$verbAR = $relArr['verb'];
    		$verbEN = $relArr['VERB_TRANSLATION_EN'];
    		$verbUthmani = $relArr['verb_uthmani'];
    		$relFreq = $relArr['frequency'];
    		$object = $relArr['object'];
    			
    
    		//$treeRootObj[$subject]["children"][]["name"]=$object;
    
    		$objectConceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$object];
    
    			
    		$index = search2DArrayForValue($currentArr,$subject);
    
    
    		$isObjectIncludedBefore = search2DArrayForValue($currentArr[$index]["children"],$object);
    
    		if ( $isObjectIncludedBefore===false)
    		{
    			//$currentArr[$index]["children"][] = array("name"=>$object,"size"=>$objectConceptArr['frequency'],"children"=>array());
    		}
    
    	}*/
    //echoN(count($treeRootObj["children"]));
    return $treeRootObj;
}
Exemplo n.º 6
0
function loadModels($modelsToBeLoaded, $lang)
{
    global $modelSources, $serializedModelFile, $quranMetaDataFile, $META_DATA, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, $MODEL_QURANA;
    global $UTHMANI_TO_SIMPLE_WORD_MAP, $numberOfSuras, $pauseMarksFile;
    global $TRANSLATION_MAP_EN_TO_AR, $TRANSLATION_MAP_AR_TO_EN, $TRANSLITERATION_WORDS_MAP, $TRANSLITERATION_VERSES_MAP;
    global $wordByWordTranslationFile, $transliterationFile;
    global $MODEL_WORDNET, $qaOntologyNamespace, $qaOntologyFile, $is_a_relation_name_ar, $is_a_relation_name_en;
    global $thing_class_name_ar, $thing_class_name_en;
    global $MODEL_QA_ONTOLOGY, $arabicStopWordsFileL2;
    global $TRANSLITERATION_WORDS_LOCATION_MAP, $TRANSLITERATION_WORDS_INDEX;
    //not working
    gc_enable();
    if (!function_exists("apc_exists")) {
        throw new Exception("APC not found!");
    }
    //echoN("MODEL EXISTS IN CACHE?:".apc_exists("EN/MODEL_CORE/TOTALS/"));
    ##### CHECK MODEL IN CACHE ##### #####
    if (TRUE && apc_exists("EN/MODEL_CORE/TOTALS/") !== false) {
        // split list by comma
        $modelListArr = preg_split("/,/", trim($modelsToBeLoaded));
        /**
         * TODO: CHANGE THE CODE TO REFERENCE APC MEMORY DIRECTLY INSTEAD OF LOADING DATA IN EACH SCRIPT
         */
        foreach ($modelListArr as $modelName) {
            //echoN("$modelName $lang ".time());
            //	echoN(memory_get_peak_usage());
            //echoN($modelName);
            if ($modelName == "ontology") {
                /*$MODEL_QA_ONTOLOGY =  apc_fetch("MODEL_QA_ONTOLOGY");
                		
                		
                		
                		if ($MODEL_QA_ONTOLOGY===false )
                		{
                			echo "$MODEL_QA_ONTOLOGY NOT CACHED";exit;
                		}
                		*/
            }
            if ($modelName == "wordnet") {
            }
            if ($modelName == "core") {
                //$MODEL_CORE = json_decode((file_get_contents("$serializedModelFile.core")),TRUE);
                /*$MODEL_CORE  = apc_fetch("MODEL_CORE[$lang]");
                		
                		
                		if ($MODEL_CORE===false )
                		{
                			echo "CORE MODEL [$lang] NOT CACHED";exit;
                		}*/
            } else {
                if ($modelName == "search") {
                    //$MODEL_SEARCH = json_decode((file_get_contents("$serializedModelFile.search")),TRUE);
                    //$MODEL_SEARCH  = apc_fetch("MODEL_SEARCH[$lang]");
                    /*if ($MODEL_SEARCH===false )
                    		{
                    			echo "SEARCH MODEL [$lang] NOT CACHED";exit;
                    		}*/
                } else {
                    if ($modelName == "qac") {
                        //$MODEL_QAC = json_decode((file_get_contents("$serializedModelFile.qac")),TRUE);
                        /*$MODEL_QAC  = apc_fetch("MODEL_QAC");
                        		
                        		
                        		if ($MODEL_QAC===false )
                        		{
                        			echo "QAC MODEL NOT CACHED";exit;
                        		}
                        		*/
                    }
                }
            }
        }
        $MODEL_WORDNET['INDEX'] = apc_fetch("WORDNET_INDEX");
        if ($MODEL_WORDNET['INDEX'] === false) {
            echo "MODEL_WORDNET['INDEX'] NOT CACHED";
            exit;
        }
        $MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] = apc_fetch("WORDNET_LEXICO_SEMANTIC_CATEGORIES");
        if ($MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] === false) {
            echo " MODEL MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] NOT CACHED";
            exit;
        }
        $MODEL_WORDNET['DATA'] = apc_fetch("WORDNET_DATA");
        if ($MODEL_WORDNET['DATA'] === false) {
            echo "MODEL MODEL_WORDNET['DATA'] NOT CACHED";
            exit;
        }
        //else if ( ($modelName=="qurana"))
        //{
        //$MODEL_QURANA = json_decode((file_get_contents("$serializedModelFile.qurana")),TRUE);
        $MODEL_QURANA = apc_fetch("MODEL_QURANA");
        if ($MODEL_QURANA === false) {
            echo "QURANA MODEL NOT CACHED";
            exit;
        }
        //}
        return;
    }
    ########## ##### ##### ##### ##### #####
    //$quran = file($quranMetaDataFile);
    $quranMetaDataXMLObj = simplexml_load_file($quranMetaDataFile);
    ###### CONVERT META XML STRUCUTURE TO OUR STRUCTURE
    foreach ($quranMetaDataXMLObj->suras as $index => $surasArr) {
        foreach ($surasArr->sura as $suraMetaArr) {
            $tempArr = array();
            $tempArr['index'] = (string) $suraMetaArr['index'];
            $tempArr['ayas'] = (string) $suraMetaArr['ayas'];
            $tempArr['name_ar'] = (string) $suraMetaArr['name'];
            $tempArr['name_trans'] = (string) $suraMetaArr['tname'];
            $tempArr['name_en'] = (string) $suraMetaArr['ename'];
            $tempArr['type'] = (string) $suraMetaArr['type'];
            $tempArr['order'] = (string) $suraMetaArr['order'];
            $META_DATA['SURAS'][] = $tempArr;
        }
    }
    ##############################################
    /////////// LOAD ONTOLOGY
    $reader = new OWLReader();
    $ontology = new OWLMemoryOntology();
    $thingClassName = "{$thing_class_name_ar}";
    $ontology->setNamespace($qaOntologyNamespace);
    $reader->readFromFile($qaOntologyFile, $ontology);
    //preprint_r($ontology->{'owl_data'}['classes']);
    //preprint_r($ontology->{'owl_data'}['properties']);
    //preprint_r($ontology->{'owl_data'}['labels']);
    //preprint_r($ontology->{'owl_data'}['annotations']);
    //preprint_r($ontology->{'owl_data'}['instances']);
    $classes = $ontology->{'owl_data'}['classes'];
    $instances = $ontology->{'owl_data'}['instances'];
    $qaOntologyConceptsArr = array();
    $qaOntologyRelationsArr = array();
    $relationsCount = 0;
    foreach ($classes as $className => $infoArr) {
        $className = stripOntologyNamespace($className);
        $qaOntologyConceptsArr[$className] = array("type" => "class");
        //echoN($className);
        //preprint_r($infoArr);
        foreach ($infoArr[0]['properties'] as $index => $propertiesArr) {
            /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/
            foreach ($propertiesArr as $index2 => $onePropertyArr) {
                if (empty($onePropertyArr)) {
                    continue;
                }
                $verb = key($onePropertyArr);
                $objectClassArr = current($onePropertyArr);
                $objectConceptName = stripOntologyNamespace($objectClassArr[0]);
                //echoN("CLASS:***** $className => $verb -> $objectConceptName");
                $attributedArr = next($onePropertyArr);
                $freq = $attributedArr['frequency'];
                $engTranslation = $attributedArr['verb_translation_en'];
                $verbUthmani = $attributedArr['verb_uthmani'];
                $relHashID = buildRelationHashID($className, $verb, $objectConceptName);
                $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $className, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani);
                //preprint_r($qaOntologyRelationsArr[$relHashID]);
                $relationsCount++;
            }
        }
    }
    foreach ($instances as $instanceName => $intancesArr) {
        foreach ($intancesArr as $index => $infoArr) {
            $subjectConceptName = stripOntologyNamespace($instanceName);
            $parent = stripOntologyNamespace($infoArr['class']);
            //echoN("$subjectConceptName $parent");
            $relHashID = buildRelationHashID($subjectConceptName, $is_a_relation_name_ar, $parent);
            $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => "{$is_a_relation_name_ar}", "OBJECT" => $parent, "VERB_TRANSLATION_EN" => "{$is_a_relation_name_en}");
            if ($parent != $thing_class_name_ar) {
                $relationsCount++;
            }
            $propertiesArr = $infoArr['properties'];
            //echoN($instanceName);
            //echoN("$instanceName:@@@");
            //preprint_r($propertiesArr);
            /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/
            foreach ($propertiesArr as $index2 => $onePropertyArr) {
                if (empty($onePropertyArr)) {
                    continue;
                }
                $verb = key($onePropertyArr);
                $objectClassArr = current($onePropertyArr);
                $objectConceptName = stripOntologyNamespace($objectClassArr[0]);
                //echoN("***** $verb -> $objectConceptName");
                $attributedArr = next($onePropertyArr);
                $freq = $attributedArr['frequency'];
                $engTranslation = $attributedArr['verb_translation_en'];
                $verbUthmani = $attributedArr['verb_uthmani'];
                $relHashID = buildRelationHashID($subjectConceptName, $verb, $objectConceptName);
                $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani);
                $relationsCount++;
            }
            // if it is class dont make it instance even if it is a subject (subclass of another class
            // BUG: animal was not apearing on ontology graph page since it was instance
            if (empty($qaOntologyConceptsArr[$subjectConceptName]) || $qaOntologyConceptsArr[$subjectConceptName][type] != 'class') {
                $qaOntologyConceptsArr[$subjectConceptName] = array("type" => "instance");
            }
        }
    }
    foreach ($qaOntologyConceptsArr as $conceptName => $infoArr) {
        $fullConceptName = $qaOntologyNamespace . $conceptName;
        $labelsArr = $ontology->{'owl_data'}['labels'][$fullConceptName];
        foreach ($labelsArr as $labelLang => $label) {
            /*if ( mb_strlen($label)==1)
            		{
            			echon($fullConceptName);
            			preprint_r($ontology->{'owl_data'}['labels'][$fullConceptName]);
            		}*/
            $qaOntologyConceptsArr[$conceptName]['label_' . strtolower($labelLang)] = $label;
        }
        // "Thing" does not have annotations
        if (isset($ontology->{'owl_data'}['annotations'][$fullConceptName])) {
            $annotationsArr = $ontology->{'owl_data'}['annotations'][$fullConceptName];
            foreach ($annotationsArr as $index => $annotArr) {
                $key = $annotArr['KEY'];
                $val = $annotArr['VAL'];
                $qaOntologyConceptsArr[$conceptName][$key] = $val;
                //echoN("[$conceptName][$key] = $val");
            }
        }
    }
    ////////// OUTPUT STATS
    /*echoN("INSTANCES COUNT:".count($ontology->{'owl_data'}['instances']));
    	echoN("CLASSES COUNT:".count($ontology->{'owl_data'}['classes']));
    	echoN("PROPERTIES COUNT - DECLERATIONS ONLY:".count($ontology->{'owl_data'}['properties']));;
    	echoN("CONCEPTS COUNT:".count($qaOntologyConceptsArr));
    	echoN("RELATIONS COUNT:".$relationsCount);
    	preprint_r($qaOntologyRelationsArr);*/
    //////////////////
    ///////////// QUALITY CHECK CONCEPTS
    $qaOntologyConceptsArr2 = array();
    foreach ($qaOntologyConceptsArr as $key => $val) {
        $newKey = strtr($key, "_", " ");
        $qaOntologyConceptsArr2[$newKey] = $value;
    }
    $ONTOLOGY_EXTRACTION_FOLDER = "../data/ontology/extraction/";
    $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.final"));
    $diffArr = array_diff(array_keys($qaOntologyConceptsArr2), array_keys($finalConcepts));
    $conceptsDiffCount = count($matchingTable);
    if ($relationsDiffCount > 0) {
        echoN("<b>### OWL-PROPRIETARY-CONCEPTS-DIFF-COUNT:</b>" . $conceptsDiffCount);
    }
    //preprint_r($diffArr);
    //////////////////////////////////////////////////////////////
    //////// quality check relations
    $relationsArr = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations"));
    $matchingTable = array();
    foreach ($qaOntologyRelationsArr as $index => $relArr) {
        $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT'];
        //since Thing relations are not in the list we are comparing with
        if ($relArr['OBJECT'] == $thing_class_name_ar) {
            continue;
        }
        //echoN($trippleStr);
        $trippleStr = trim($trippleStr);
        $matchingTable[$trippleStr]++;
    }
    foreach ($relationsArr as $index => $relArr) {
        $relArr['SUBJECT'] = strtr($relArr['SUBJECT'], " ", "_");
        $relArr['VERB'] = strtr($relArr['VERB'], " ", "_");
        $relArr['OBJECT'] = strtr($relArr['OBJECT'], " ", "_");
        $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT'];
        $trippleStr = trim($trippleStr);
        $matchingTable[$trippleStr]++;
    }
    function filterFunc($v)
    {
        return $v <= 1;
    }
    $matchingTable = array_filter($matchingTable, 'filterFunc');
    $relationsDiffCount = count($matchingTable);
    if ($relationsDiffCount > 0) {
        echoN("<b>### OWL-PROPRIETARY-RELATIONS-DIFF-COUNT:</b>" . $relationsDiffCount);
        preprint_r($matchingTable);
    }
    //////////////////////////////////////////////
    //echoN( join("<br>",array_keys($qaOntologyConceptsArr)));
    $qaOntologyVerbIndex = array();
    $qaOntologyGraphSourcesIndex = array();
    $qaOntologyGraphTargetsIndex = array();
    //preprint_r($qaOntologyRelationsArr);
    //exit;
    foreach ($qaOntologyRelationsArr as $index => $relArr) {
        $subject = $relArr['SUBJECT'];
        $verb = $relArr['VERB'];
        $verb_translation_en = $relArr['VERB_TRANSLATION_EN'];
        $object = $relArr['OBJECT'];
        //$qaOntologyVerbIndex[$verb][]=array("SUBJECT"=>$subject,"OBJECT"=>$object);
        //$qaOntologyVerbIndex[$verb_translation_en][]=array("SUBJECT"=>$subject,"OBJECT"=>$object);
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb, array("SUBJECT" => $subject, "OBJECT" => $object));
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb_translation_en, array("SUBJECT" => $subject, "OBJECT" => $object));
        //$qaOntologyGraphSourcesIndex[$subject][]=array("link_verb"=>$verb,"target"=>$object,"relation_index"=>$index);
        //$qaOntologyGraphTargetsIndex[$object][]=array("source"=>$subject,"link_verb"=>$verb,"relation_index"=>$index);
        addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $subject, array("link_verb" => $verb, "target" => $object, "relation_index" => $index));
        addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $object, array("source" => $subject, "link_verb" => $verb, "relation_index" => $index));
    }
    $qaOntologyConceptsENtoARMapArr = array();
    foreach ($qaOntologyConceptsArr as $arName => $conceptArr) {
        $enLabel = trim(strtolower($conceptArr['label_en']));
        //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName;
        //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName;
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $enLabel, $arName);
    }
    $qaSynonymsIndex = array();
    foreach ($qaOntologyConceptsArr as $arName => $conceptArr) {
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $arName, $conceptArr);
        $i = 1;
        while (isset($conceptArr['synonym_' . $i])) {
            if (empty($conceptArr['synonym_' . $i])) {
                $i++;
                continue;
            }
            $synonymLabel = trim(strtolower($conceptArr['synonym_' . $i]));
            $qaSynonymsIndex[$synonymLabel] = $arName;
            addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $synonymLabel, $arName);
            $i++;
        }
    }
    //preprint_r($qaOntologyConceptsArr);exit;
    //$MODEL_QA_ONTOLOGY['CONCEPTS'] = $qaOntologyConceptsArr;
    //$MODEL_QA_ONTOLOGY['RELATIONS'] = $qaOntologyRelationsArr;
    addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", "", $qaOntologyRelationsArr);
    //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'] = $qaOntologyGraphSourcesIndex;
    //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'] = $qaOntologyGraphTargetsIndex;
    //$MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'] = $qaOntologyConceptsENtoARMapArr;
    //$MODEL_QA_ONTOLOGY['VERB_INDEX']  = $qaOntologyVerbIndex;
    //$MODEL_QA_ONTOLOGY['SYNONYMS_INDEX']  = $qaSynonymsIndex;
    //$res = apc_store("MODEL_QA_ONTOLOGY",$MODEL_QA_ONTOLOGY);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_QA_ONTOLOGY"); }
    //preprint_r($MODEL_QA_ONTOLOGY);exit;
    //////// END ONTOLOGY LOADING
    ////////////////////////////
    /// WORDNET
    loadWordnet($MODEL_WORDNET);
    /////////////
    //free resources
    $quranMetaDataXMLObj = null;
    unset($quranMetaDataXMLObj);
    foreach ($modelSources as $supportedLang => $modelSourceArr) {
        $type = $modelSourceArr['type'];
        $file = $modelSourceArr['file'];
        //echoN("$lang $type $file");
        loadModel($supportedLang, $type, $file);
        //not working
        $gced = gc_collect_cycles();
        //echoN($gced);
    }
    //echoN(json_encode($MODEL));
    ############ Uthmani/Simple mapping table #################
    ############ AND WORD-WORD TRANSLATION AND TRANSLITERATION #################
    $pauseMarksArr = getPauseMarksArrByFile($pauseMarksFile);
    $wordByWordFileArr = file($wordByWordTranslationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    $translitertationArr = file($transliterationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    $WORD_SENSES_EN = array();
    $WORD_SENSES_AR = array();
    $quranTextEntryFromAPC_AR = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
    $quranTextEntryFromAPC_UTH = getModelEntryFromMemory("AR_UTH", "MODEL_CORE", "QURAN_TEXT", "");
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $suraSize = count($quranTextEntryFromAPC_AR[$s]);
        /* VERSES LOOP **/
        for ($a = 0; $a < $suraSize; $a++) {
            $i++;
            $verseTextSimple = $quranTextEntryFromAPC_AR[$s][$a];
            $simpleWordsArr = preg_split("/ /", $verseTextSimple);
            $verseTextUthmani = $quranTextEntryFromAPC_UTH[$s][$a];
            $uthmaniWordsArr = preg_split("/ /", $verseTextUthmani);
            $simpleWordsArr = removePauseMarksFromArr($pauseMarksArr, $simpleWordsArr);
            $uthmaniWordsArr = removePauseMarksFromArr($pauseMarksArr, $uthmaniWordsArr);
            $verseLocation = $s + 1 . ":" . ($a + 1);
            $UTHMANI_TO_SIMPLE_LOCATION_MAP[$verseLocation] = array();
            ///////// Transliteration /////////////
            $transliterationLine = current($translitertationArr);
            next($translitertationArr);
            $lineParts = preg_split("/\\|/", $transliterationLine);
            $verseTransliteration = $lineParts[2];
            //echoN($transliterationLine);
            $TRANSLITERATION_VERSES_MAP[$verseLocation] = $verseTransliteration;
            $wordsTransliterationArr = preg_split("/ /", $verseTransliteration);
            // preprint_r($wordsTransliterationArr);exit;
            /////////////////////////////////////////////////
            $wtwIndex = 0;
            foreach ($uthmaniWordsArr as $index => $wordUthmani) {
                $qacMasterID = $s + 1 . ":" . ($a + 1) . ":" . ($index + 1);
                $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $qacMasterID);
                $lemma = $qacMasterTableEntry[0]['FEATURES']['LEM'];
                // to handle multi segment words such as الدنيا
                if (empty($lemma)) {
                    $lemma = $qacMasterTableEntry[1]['FEATURES']['LEM'];
                }
                //echoN("|$lemma|$wordUthmani");
                //$wtwIndex (INDEX_IN_AYA_EMLA2Y) needs to be 1 based  ( UTHMANI=IMLA2Y )
                $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                $wordSimple = $simpleWordsArr[$wtwIndex++];
                //$UTHMANI_TO_SIMPLE_LOCATION_MAP[($s+1).":".($a+1)][($index+1)."-".$wordUthmani]=($wtwIndex)."-".$wordSimple;
                /* for ayas which are different in size, do the following
                 * if the current word is  ويا  or  ها or   يا
                 * then join it with the next word and make them one word
                 */
                if (count($uthmaniWordsArr) != count($simpleWordsArr) && ($wordSimple == "يا" || $wordSimple == "ها" || $wordSimple == "ويا" || $wordUthmani == "وَأَلَّوِ")) {
                    if ($wordUthmani == "يَبْنَؤُمَّ") {
                        // example 0 => 1
                        $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                        //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple;
                        $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++] . " " . $simpleWordsArr[$wtwIndex++];
                    } else {
                        // example 0 => 1
                        $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                        //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple;
                        $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++];
                    }
                    //echoN("$wordUthmani:$wordSimple");
                }
                //	printHTMLPageHeader();
                //	echoN("$wordSimple|$wordUthmani");
                ///////// english translation ////////
                $wordByWordTranslationLine = current($wordByWordFileArr);
                next($wordByWordFileArr);
                $linePartsArr = preg_split("/\\|/", $wordByWordTranslationLine);
                $englishTranslationForCurrentWord = $linePartsArr[5];
                /////////////////////////////////////////////////
                $WORD_SENSES_EN[$englishTranslationForCurrentWord][$wordUthmani]++;
                $WORD_SENSES_AR[$wordUthmani][$englishTranslationForCurrentWord]++;
                $TRANSLATION_MAP_EN_TO_AR[$englishTranslationForCurrentWord] = $wordUthmani;
                $TRANSLATION_MAP_AR_TO_EN[$wordUthmani] = $englishTranslationForCurrentWord;
                $TRANSLITERATION_WORDS_MAP[$wordUthmani] = $wordsTransliterationArr[$index];
                $clenaedTranliteration = cleanTransliteratedText($wordsTransliterationArr[$index]);
                $TRANSLITERATION_WORDS_INDEX[$clenaedTranliteration] = 1;
                $TRANSLITERATION_WORDS_LOCATION_MAP["{$s}:{$a}:{$index}"] = $wordsTransliterationArr[$index];
                //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP);
                // preprint_r($TRANSLATION_MAP_AR_TO_EN);
                // preprint_r($TRANSLITERATION_WORDS_MAP);
                $UTHMANI_TO_SIMPLE_WORD_MAP[$wordUthmani] = $wordSimple;
                addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordUthmani, $wordSimple);
                $UTHMANI_TO_SIMPLE_WORD_MAP[$wordSimple] = $wordUthmani;
                addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordSimple, $wordUthmani);
                if (!empty($lemma)) {
                    if (!isset($LEMMA_TO_SIMPLE_WORD_MAP[$lemma])) {
                        $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple;
                    } else {
                        $oldSimple = $LEMMA_TO_SIMPLE_WORD_MAP[$lemma];
                        if (myLevensteinEditDistance($oldSimple, $lemma) > myLevensteinEditDistance($wordSimple, $lemma)) {
                            $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple;
                        }
                    }
                }
            }
        }
    }
    /////// ADD UTHMANI TO SIMPLE LOCATION MAP TO MEMORY
    foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP as $verseLocation => $verseMappingArr) {
        /*foreach($mappingArr as $uhtmaniIndex=>$imal2yIndex)
        		{
        			
        		}*/
        addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_LOCATION_MAP", $verseLocation, $verseMappingArr);
    }
    ///////////////////////////////////////////////////////
    //preprint_r($TRANSLATION_MAP_EN_TO_AR);exit;
    //preprint_r($WORD_SENSES_AR);exit;
    // CAN'T BE ADDED IN THE CORE_MODEL since the mapping happens after loadModel
    //$res = apc_store("UTHMANI_TO_SIMPLE_WORD_MAP",$UTHMANI_TO_SIMPLE_WORD_MAP);
    //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_WORD_MAP"); }
    //$res = apc_store("UTHMANI_TO_SIMPLE_LOCATION_MAP",$UTHMANI_TO_SIMPLE_LOCATION_MAP);
    //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_LOCATION_MAP"); }
    $res = apc_store("LEMMA_TO_SIMPLE_WORD_MAP", $LEMMA_TO_SIMPLE_WORD_MAP);
    if ($res === false) {
        throw new Exception("Can't cache LEMMA_TO_SIMPLE_WORD_MAP");
    }
    $res = apc_store("WORDS_TRANSLATIONS_EN_AR", $TRANSLATION_MAP_EN_TO_AR);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLATIONS_EN_AR");
    }
    $res = apc_store("WORDS_TRANSLATIONS_AR_EN", $TRANSLATION_MAP_AR_TO_EN);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLATIONS_AR_EN");
    }
    $res = apc_store("WORDS_TRANSLITERATION", $TRANSLITERATION_WORDS_MAP);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLITERATION");
    }
    $res = apc_store("TRANSLITERATION_WORDS_LOCATION_MAP", $TRANSLITERATION_WORDS_LOCATION_MAP);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_WORDS_LOCATION_MAP");
    }
    $res = apc_store("TRANSLITERATION_VERSES_MAP", $TRANSLITERATION_VERSES_MAP);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_VERSES_MAP");
    }
    $res = apc_store("TRANSLITERATION_WORDS_INDEX", $TRANSLITERATION_WORDS_INDEX);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_WORDS_INDEX");
    }
    $res = apc_store("WORD_SENSES_EN", $WORD_SENSES_EN);
    if ($res === false) {
        throw new Exception("Can't cache WORD_SENSES_EN");
    }
    $res = apc_store("WORD_SENSES_AR", $WORD_SENSES_AR);
    if ($res === false) {
        throw new Exception("Can't cache {$WORD_SENSES_AR}");
    }
    //// ENRICH INVERTED INDEX BY UTHMANI-EMLA2Y INDEXES
    //echoN(count($MODEL_SEARCH['AR']['INVERTED_INDEX']));
    foreach (getAPCIterator("AR\\/MODEL_SEARCH\\/INVERTED_INDEX\\/.*") as $invertedIndexCursor) {
        $wordDataArr = $invertedIndexCursor['value'];
        $key = $invertedIndexCursor['key'];
        $word = getEntryKeyFromAPCKey($key);
        foreach ($wordDataArr as $index => $documentArrInIndex) {
            $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
            $SURA = $documentArrInIndex['SURA'];
            $AYA = $documentArrInIndex['AYA'];
            //echoN($word." ".$WORD_TYPE);
            if ($WORD_TYPE == "NORMAL_WORD") {
                $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
                foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP[$SURA + 1 . ":" . ($AYA + 1)] as $uhtmaniIndex => $imal2yIndex) {
                    if ($imal2yIndex == $INDEX_IN_AYA_EMLA2Y) {
                        $INDEX_IN_AYA_UTHMANI = $uhtmaniIndex;
                        break;
                    }
                }
                //echoN($INDEX_IN_AYA_UTHMANI);
                $wordDataArr[$index]['INDEX_IN_AYA_UTHMANI'] = $INDEX_IN_AYA_UTHMANI;
            } else {
                // needed for highlighting pronoun charcters in search
                $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
                $INDEX_IN_AYA_EMLA2Y = getSimpleWordIndexByUthmaniWordIndex($SURA + 1 . ":" . ($AYA + 1), $INDEX_IN_AYA_UTHMANI);
                $wordDataArr[$index]['INDEX_IN_AYA_EMLA2Y'] = $INDEX_IN_AYA_EMLA2Y;
            }
        }
        //UPDATE
        updateModelData($key, $wordDataArr);
    }
    //$res = apc_store("MODEL_SEARCH[AR]",$MODEL_SEARCH['AR']);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[AR]"); }
    //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP);
    /// ADD TRANSLITERATION TO INVERETD INDEX WWITH ENGLISH WORDS
    if ($lang == "EN") {
        $invertedIndexBatchApcArr = array();
        foreach ($TRANSLITERATION_WORDS_LOCATION_MAP as $location => $transliteratedWord) {
            $locationArr = explode(":", $location);
            $s = $locationArr[0];
            $a = $locationArr[1];
            $wordIndex = $locationArr[2];
            //echoN("$transliteratedWord,$s,$a,$wordIndex");
            $transliteratedWord = strtolower(strip_tags($transliteratedWord));
            //$MODEL_SEARCH['EN']['INVERTED_INDEX'][$word]
            addToInvertedIndex($invertedIndexBatchApcArr, $lang, $transliteratedWord, $s, $a, $wordIndex, "NORMAL_WORD");
        }
        addToMemoryModelBatch($invertedIndexBatchApcArr);
        //$res = apc_store("MODEL_SEARCH[EN]",$MODEL_SEARCH['EN']);
    }
    //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[EN]"); }
    /////////////////////////////////////////////////////////
    //preprint_r($UTHMANI_TO_SIMPLE_WORD_MAP);
    //preprint_r($MODEL_CORE["AR_UTH"]['QURAN_TEXT']);exit;
    ##############################################################
    // get memory usage
    $debug = memory_get_usage(true) / 1024 / 1024 . "/" . memory_get_peak_usage(true) / 1024 / 1024 . "Memory <br>";
    //echoN($debug);
    //needed to be set here after both languages has been loaded
    // reload all models from memory to set all variables (WORDNET) - after model generation
    /* needed to reload all generated models from memory specialy model_core since 
     * it has 3 languages, if this line is removed: all 3 langauges are loaded although only one language 
     * is requested, also it caused a bug in getPoSTaggedSubsentences
     */
    //loadModels($modelsToBeLoaded,$lang);
}
Exemplo n.º 7
0
function extendQueryWordsByDerivations($taggedSignificantWords, $lang)
{
    foreach ($taggedSignificantWords as $word => $posTag) {
        // avoid small words, will lead to too many iirelevant derivations
        if (mb_strlen($word) <= 2) {
            continue;
        }
        if ($lang == "EN") {
            if ($posTag == "NN") {
                $plural = $word . "s";
                $taggedSignificantWords[$plural] = "{$posTag}" . "S";
            } else {
                if ($posTag == "NNS") {
                    $single = substr($word, 0, -1);
                    $taggedSignificantWords[$single] = substr($posTag, 0, -1);
                }
            }
        } else {
            $simmlarWords = array();
            $qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
            foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
                $conceptID = getEntryKeyFromAPCKey($conceptsCursor['key']);
                $mainConceptArr = $conceptsCursor['value'];
                $conceptLabelAR = $mainConceptArr['label_ar'];
                $dist = myLevensteinEditDistance($word, $conceptLabelAR);
                if ($dist <= 5) {
                    $dist = getDistanceByCommonUniqueChars($word, $conceptLabelAR);
                    $simmlarWords[$conceptLabelAR] = $dist;
                }
                $i = 1;
                while (isset($mainConceptArr['synonym_' . $i]) && isArabicString($mainConceptArr['synonym_' . $i])) {
                    $synonym = $mainConceptArr['synonym_' . $i];
                    $dist = myLevensteinEditDistance($word, $synonym);
                    if ($dist <= 5) {
                        $dist = getDistanceByCommonUniqueChars($word, $synonym);
                        $simmlarWords[$synonym] = $dist;
                    }
                    $i++;
                }
            }
            foreach ($simmlarWords as $conceptWord => $distBySimChars) {
                $diff = mb_strlen($conceptWord) - mb_strlen($word);
                $absDiffSize = abs($diff);
                // $word is bigger
                if ($diff < 0) {
                    $absDiffSize = abs($diff);
                    $diffStr = getStringDiff($conceptWord, $word);
                    //echoN($diffStr);
                    //حيوان => الحيوانات
                    // the bigger word should not contain space الله => سبيل الله
                    // $diffStr=="الات" for حيوان = الحياوانات
                    if (mb_strpos($word, $conceptWord) !== false && strpos($word, " ") === false && ($diffStr == "ات" || $diffStr == "ال" || $diffStr == "الات")) {
                        //echoN("$word, $conceptWord");
                        /// convert word to noun plular
                        $taggedSignificantWords[$word] = "NNS";
                        // concept word is singular
                        $taggedSignificantWords[$conceptWord] = "NN";
                    } else {
                        if ($diff == 1) {
                            $wordLastCharTrimmed = mb_substr($conceptWord, 0, -1);
                            if ($wordLastCharTrimmed . "ات" == $word) {
                                /// convert word to noun plular
                                $taggedSignificantWords[$word] = "NNS";
                                /// convert word to noun plular
                                $taggedSignificantWords[$conceptWord] = "NN";
                            }
                        }
                    }
                } else {
                    $diffStr = getStringDiff($conceptWord, $word);
                    //  الحيوانات => حيوان
                    // the bigger word should not contain space الله => سبيل الله
                    if ($diff != 0 && mb_strpos($conceptWord, $word) !== false && strpos($conceptWord, " ") === false && ($diffStr == "ات" || $diffStr == "ال" || $diffStr == "الات")) {
                        //echoN("$word,$conceptWord");
                        /// convert word to noun plular
                        $taggedSignificantWords[$conceptWord] = "NNS";
                    } else {
                        if ($diff == 1) {
                            $wordLastCharTrimmed = mb_substr($word, 0, -1);
                            if ($wordLastCharTrimmed . "ات" == $conceptWord) {
                                /// convert word to noun plular
                                $taggedSignificantWords[$conceptWord] = "NNS";
                            }
                        }
                    }
                }
            }
            // limit the number of derivations+original terms to 15
            $taggedSignificantWords = array_slice($taggedSignificantWords, 0, 10);
            //arsort($simmlarWords);
            //preprint_r($taggedSignificantWords);
            //preprint_r($simmlarWords);
            //exit;
        }
    }
    //preprint_r($taggedSignificantWords);
    return $taggedSignificantWords;
}